370 files changed, 82458 insertions, 0 deletions
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
new file mode 100644
index 000000000..2938934c6
--- /dev/null
+++ b/arch/s390/Kbuild
@@ -0,0 +1,8 @@
+obj-y				+= kernel/
+obj-y				+= mm/
+obj-$(CONFIG_KVM)		+= kvm/
+obj-$(CONFIG_CRYPTO_HW)		+= crypto/
+obj-$(CONFIG_S390_HYPFS_FS)	+= hypfs/
+obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
+obj-y				+= net/
+obj-$(CONFIG_PCI)		+= pci/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
new file mode 100644
index 000000000..b06dc3839
--- /dev/null
+++ b/arch/s390/Kconfig
@@ -0,0 +1,798 @@
+config MMU
+	def_bool y
+
+config ZONE_DMA
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool y
+
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config HAVE_LATENCYTOP_SUPPORT
+	def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+
+config RWSEM_XCHGADD_ALGORITHM
+	def_bool y
+
+config ARCH_HAS_ILOG2_U32
+	def_bool n
+
+config ARCH_HAS_ILOG2_U64
+	def_bool n
+
+config GENERIC_HWEIGHT
+	def_bool y
+
+config GENERIC_BUG
+	def_bool y if BUG
+
+config GENERIC_BUG_RELATIVE_POINTERS
+	def_bool y
+
+config ARCH_DMA_ADDR_T_64BIT
+	def_bool y
+
+config GENERIC_LOCKBREAK
+	def_bool y if SMP && PREEMPT
+
+config PGSTE
+	def_bool y if KVM
+
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	def_bool y
+
+config KEXEC
+	def_bool y
+
+config AUDIT_ARCH
+	def_bool y
+
+config NO_IOPORT_MAP
+	def_bool y
+
+config PCI_QUIRKS
+	def_bool n
+
+config ARCH_SUPPORTS_UPROBES
+	def_bool y
+
+config S390
+	def_bool y
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_GCOV_PROFILE_ALL
+	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_INLINE_READ_LOCK
+	select ARCH_INLINE_READ_LOCK_BH
+	select ARCH_INLINE_READ_LOCK_IRQ
+	select ARCH_INLINE_READ_LOCK_IRQSAVE
+	select ARCH_INLINE_READ_TRYLOCK
+	select ARCH_INLINE_READ_UNLOCK
+	select ARCH_INLINE_READ_UNLOCK_BH
+	select ARCH_INLINE_READ_UNLOCK_IRQ
+	select ARCH_INLINE_READ_UNLOCK_IRQRESTORE
+	select ARCH_INLINE_SPIN_LOCK
+	select ARCH_INLINE_SPIN_LOCK_BH
+	select ARCH_INLINE_SPIN_LOCK_IRQ
+	select ARCH_INLINE_SPIN_LOCK_IRQSAVE
+	select ARCH_INLINE_SPIN_TRYLOCK
+	select ARCH_INLINE_SPIN_TRYLOCK_BH
+	select ARCH_INLINE_SPIN_UNLOCK
+	select ARCH_INLINE_SPIN_UNLOCK_BH
+	select ARCH_INLINE_SPIN_UNLOCK_IRQ
+	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE
+	select ARCH_INLINE_WRITE_LOCK
+	select ARCH_INLINE_WRITE_LOCK_BH
+	select ARCH_INLINE_WRITE_LOCK_IRQ
+	select ARCH_INLINE_WRITE_LOCK_IRQSAVE
+	select ARCH_INLINE_WRITE_TRYLOCK
+	select ARCH_INLINE_WRITE_UNLOCK
+	select ARCH_INLINE_WRITE_UNLOCK_BH
+	select ARCH_INLINE_WRITE_UNLOCK_IRQ
+	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_WANT_IPC_PARSE_VERSION
+	select BUILDTIME_EXTABLE_SORT
+	select CLONE_BACKWARDS2
+	select DYNAMIC_FTRACE if FUNCTION_TRACER
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_CPU_DEVICES if !SMP
+	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_TIME_VSYSCALL
+	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
+	select HAVE_CMPXCHG_DOUBLE
+	select HAVE_CMPXCHG_LOCAL
+	select HAVE_DEBUG_KMEMLEAK
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER
+	select HAVE_FUTEX_CMPXCHG if FUTEX
+	select HAVE_KERNEL_BZIP2
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZ4
+	select HAVE_KERNEL_LZMA
+	select HAVE_KERNEL_LZO
+	select HAVE_KERNEL_XZ
+	select HAVE_KPROBES
+	select HAVE_KRETPROBES
+	select HAVE_KVM
+	select HAVE_LIVEPATCH
+	select HAVE_MEMBLOCK
+	select HAVE_MEMBLOCK_NODE_MAP
+	select HAVE_MEMBLOCK_PHYS_MAP
+	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_OPROFILE
+	select HAVE_PERF_EVENTS
+	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_VIRT_CPU_ACCOUNTING
+	select MODULES_USE_ELF_RELA
+	select NO_BOOTMEM
+	select OLD_SIGACTION
+	select OLD_SIGSUSPEND3
+	select SYSCTL_EXCEPTION_TRACE
+	select TTY
+	select VIRT_CPU_ACCOUNTING
+	select VIRT_TO_BUS
+
+config SCHED_OMIT_FRAME_POINTER
+	def_bool y
+
+config PGTABLE_LEVELS
+	int
+	default 4 if 64BIT
+	default 2
+
+source "init/Kconfig"
+
+source "kernel/Kconfig.freezer"
+
+source "kernel/livepatch/Kconfig"
+
+menu "Processor type and features"
+
+config HAVE_MARCH_Z900_FEATURES
+	def_bool n
+
+config HAVE_MARCH_Z990_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z900_FEATURES
+
+config HAVE_MARCH_Z9_109_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z990_FEATURES
+
+config HAVE_MARCH_Z10_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z9_109_FEATURES
+
+config HAVE_MARCH_Z196_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z10_FEATURES
+
+config HAVE_MARCH_ZEC12_FEATURES
+	def_bool n
+	select HAVE_MARCH_Z196_FEATURES
+
+config HAVE_MARCH_Z13_FEATURES
+	def_bool n
+	select HAVE_MARCH_ZEC12_FEATURES
+
+choice
+	prompt "Processor type"
+	default MARCH_Z900
+
+config MARCH_Z900
+	bool "IBM zSeries model z800 and z900"
+	select HAVE_MARCH_Z900_FEATURES
+	help
+	  Select this to enable optimizations for model z800/z900 (2064 and
+	  2066 series). This will enable some optimizations that are not
+	  available on older ESA/390 (31 Bit) only CPUs.
+
+config MARCH_Z990
+	bool "IBM zSeries model z890 and z990"
+	select HAVE_MARCH_Z990_FEATURES
+	help
+	  Select this to enable optimizations for model z890/z990 (2084 and
+	  2086 series). The kernel will be slightly faster but will not work
+	  on older machines.
+
+config MARCH_Z9_109
+	bool "IBM System z9"
+	select HAVE_MARCH_Z9_109_FEATURES
+	help
+	  Select this to enable optimizations for IBM System z9 (2094 and
+	  2096 series). The kernel will be slightly faster but will not work
+	  on older machines.
+
+config MARCH_Z10
+	bool "IBM System z10"
+	select HAVE_MARCH_Z10_FEATURES
+	help
+	  Select this to enable optimizations for IBM System z10 (2097 and
+	  2098 series). The kernel will be slightly faster but will not work
+	  on older machines.
+
+config MARCH_Z196
+	bool "IBM zEnterprise 114 and 196"
+	select HAVE_MARCH_Z196_FEATURES
+	help
+	  Select this to enable optimizations for IBM zEnterprise 114 and 196
+	  (2818 and 2817 series). The kernel will be slightly faster but will
+	  not work on older machines.
+
+config MARCH_ZEC12
+	bool "IBM zBC12 and zEC12"
+	select HAVE_MARCH_ZEC12_FEATURES
+	help
+	  Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
+	  2827 series). The kernel will be slightly faster but will not work on
+	  older machines.
+
+config MARCH_Z13
+	bool "IBM z13"
+	select HAVE_MARCH_Z13_FEATURES
+	help
+	  Select this to enable optimizations for IBM z13 (2964 series).
+	  The kernel will be slightly faster but will not work on older
+	  machines.
+
+endchoice
+
+config MARCH_Z900_TUNE
+	def_bool TUNE_Z900 || MARCH_Z900 && TUNE_DEFAULT
+
+config MARCH_Z990_TUNE
+	def_bool TUNE_Z990 || MARCH_Z990 && TUNE_DEFAULT
+
+config MARCH_Z9_109_TUNE
+	def_bool TUNE_Z9_109 || MARCH_Z9_109 && TUNE_DEFAULT
+
+config MARCH_Z10_TUNE
+	def_bool TUNE_Z10 || MARCH_Z10 && TUNE_DEFAULT
+
+config MARCH_Z196_TUNE
+	def_bool TUNE_Z196 || MARCH_Z196 && TUNE_DEFAULT
+
+config MARCH_ZEC12_TUNE
+	def_bool TUNE_ZEC12 || MARCH_ZEC12 && TUNE_DEFAULT
+
+config MARCH_Z13_TUNE
+	def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT
+
+choice
+	prompt "Tune code generation"
+	default TUNE_DEFAULT
+	help
+	  Cause the compiler to tune (-mtune) the generated code for a machine.
+	  This will make the code run faster on the selected machine but
+	  somewhat slower on other machines.
+	  This option only changes how the compiler emits instructions, not the
+	  selection of instructions itself, so the resulting kernel will run on
+	  all other machines.
+
+config TUNE_DEFAULT
+	bool "Default"
+	help
+	  Tune the generated code for the target processor for which the kernel
+	  will be compiled.
+
+config TUNE_Z900
+	bool "IBM zSeries model z800 and z900"
+
+config TUNE_Z990
+	bool "IBM zSeries model z890 and z990"
+
+config TUNE_Z9_109
+	bool "IBM System z9"
+
+config TUNE_Z10
+	bool "IBM System z10"
+
+config TUNE_Z196
+	bool "IBM zEnterprise 114 and 196"
+
+config TUNE_ZEC12
+	bool "IBM zBC12 and zEC12"
+
+config TUNE_Z13
+	bool "IBM z13"
+
+endchoice
+
+config 64BIT
+	def_bool y
+
+config COMPAT
+	def_bool y
+	prompt "Kernel support for 31 bit emulation"
+	select COMPAT_BINFMT_ELF if BINFMT_ELF
+	select ARCH_WANT_OLD_COMPAT_IPC
+	select COMPAT_OLD_SIGACTION
+	depends on MULTIUSER
+	help
+	  Select this option if you want to enable your system kernel to
+	  handle system-calls from ELF binaries for 31 bit ESA.  This option
+	  (and some other stuff like libraries and such) is needed for
+	  executing 31 bit applications.  It is safe to say "Y".
+
+config SYSVIPC_COMPAT
+	def_bool y if COMPAT && SYSVIPC
+
+config KEYS_COMPAT
+	def_bool y if COMPAT && KEYS
+
+config SMP
+	def_bool y
+	prompt "Symmetric multi-processing support"
+	---help---
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, like most personal computers, say N. If
+	  you have a system with more than one CPU, say Y.
+
+	  If you say N here, the kernel will run on uni- and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on many, but not all,
+	  uniprocessor machines. On a uniprocessor machine, the kernel
+	  will run faster if you say N here.
+
+	  See also the SMP-HOWTO available at
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  Even if you don't know what to do here, say Y.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-512)"
+	range 2 512
+	depends on SMP
+	default "64"
+	help
+	  This allows you to specify the maximum number of CPUs which this
+	  kernel will support. The maximum supported value is 512 and the
+	  minimum value which makes sense is 2.
+
+	  This is purely to save memory - each supported CPU adds
+	  approximately sixteen kilobytes to the kernel image.
+
+config HOTPLUG_CPU
+	def_bool y
+	prompt "Support for hot-pluggable CPUs"
+	depends on SMP
+	help
+	  Say Y here to be able to turn CPUs off and on. CPUs
+	  can be controlled through /sys/devices/system/cpu/cpu#.
+	  Say N if you want to disable CPU hotplug.
+
+config SCHED_SMT
+	def_bool n
+
+config SCHED_MC
+	def_bool n
+
+config SCHED_BOOK
+	def_bool n
+
+config SCHED_TOPOLOGY
+	def_bool y
+	prompt "Topology scheduler support"
+	depends on SMP
+	select SCHED_SMT
+	select SCHED_MC
+	select SCHED_BOOK
+	help
+	  Topology scheduler support improves the CPU scheduler's decision
+	  making when dealing with machines that have multi-threading,
+	  multiple cores or multiple books.
+
+source kernel/Kconfig.preempt
+
+source kernel/Kconfig.hz
+
+endmenu
+
+menu "Memory setup"
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	select SPARSEMEM_VMEMMAP_ENABLE
+	select SPARSEMEM_VMEMMAP
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool y
+
+config ARCH_ENABLE_MEMORY_HOTPLUG
+	def_bool y if SPARSEMEM
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+	def_bool y
+
+config FORCE_MAX_ZONEORDER
+	int
+	default "9"
+
+source "mm/Kconfig"
+
+config PACK_STACK
+	def_bool y
+	prompt "Pack kernel stack"
+	help
+	  This option enables the compiler option -mkernel-backchain if it
+	  is available. If the option is available the compiler supports
+	  the new stack layout which dramatically reduces the minimum stack
+	  frame size. With an old compiler a non-leaf function needs a
+	  minimum of 96 bytes on 31 bit and 160 bytes on 64 bit. With
+	  -mkernel-backchain the minimum size drops to 16 byte on 31 bit
+	  and 24 byte on 64 bit.
+
+	  Say Y if you are unsure.
+
+config CHECK_STACK
+	def_bool y
+	prompt "Detect kernel stack overflow"
+	help
+	  This option enables the compiler option -mstack-guard and
+	  -mstack-size if they are available. If the compiler supports them
+	  it will emit additional code to each function prolog to trigger
+	  an illegal operation if the kernel stack is about to overflow.
+
+	  Say N if you are unsure.
+
+config STACK_GUARD
+	int "Size of the guard area (128-1024)"
+	range 128 1024
+	depends on CHECK_STACK
+	default "256"
+	help
+	  This allows you to specify the size of the guard area at the lower
+	  end of the kernel stack. If the kernel stack points into the guard
+	  area on function entry an illegal operation is triggered. The size
+	  needs to be a power of 2. Please keep in mind that the size of an
+	  interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit.
+	  The minimum size for the stack guard should be 256 for 31 bit and
+	  512 for 64 bit.
+
+config WARN_DYNAMIC_STACK
+	def_bool n
+	prompt "Emit compiler warnings for function with dynamic stack usage"
+	help
+	  This option enables the compiler option -mwarn-dynamicstack. If the
+	  compiler supports this options generates warnings for functions
+	  that dynamically allocate stack space using alloca.
+
+	  Say N if you are unsure.
+
+endmenu
+
+menu "I/O subsystem"
+
+config QDIO
+	def_tristate y
+	prompt "QDIO support"
+	---help---
+	  This driver provides the Queued Direct I/O base support for
+	  IBM System z.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called qdio.
+
+	  If unsure, say Y.
+
+menuconfig PCI
+	bool "PCI support"
+	select HAVE_DMA_ATTRS
+	select PCI_MSI
+	help
+	  Enable PCI support.
+
+if PCI
+
+config PCI_NR_FUNCTIONS
+	int "Maximum number of PCI functions (1-4096)"
+	range 1 4096
+	default "64"
+	help
+	  This allows you to specify the maximum number of PCI functions which
+	  this kernel will support.
+
+config PCI_NR_MSI
+	int "Maximum number of MSI interrupts (64-32768)"
+	range 64 32768
+	default "256"
+	help
+	  This defines the number of virtual interrupts the kernel will
+	  provide for MSI interrupts. If you configure your system to have
+	  too few drivers will fail to allocate MSI interrupts for all
+	  PCI devices.
+
+source "drivers/pci/Kconfig"
+source "drivers/pci/pcie/Kconfig"
+source "drivers/pci/hotplug/Kconfig"
+
+endif	# PCI
+
+config PCI_DOMAINS
+	def_bool PCI
+
+config HAS_IOMEM
+	def_bool PCI
+
+config IOMMU_HELPER
+	def_bool PCI
+
+config HAS_DMA
+	def_bool PCI
+	select HAVE_DMA_API_DEBUG
+
+config NEED_SG_DMA_LENGTH
+	def_bool PCI
+
+config NEED_DMA_MAP_STATE
+	def_bool PCI
+
+config CHSC_SCH
+	def_tristate m
+	prompt "Support for CHSC subchannels"
+	help
+	  This driver allows usage of CHSC subchannels. A CHSC subchannel
+	  is usually present on LPAR only.
+	  The driver creates a device /dev/chsc, which may be used to
+	  obtain I/O configuration information about the machine and
+	  to issue asynchronous chsc commands (DANGEROUS).
+	  You will usually only want to use this interface on a special
+	  LPAR designated for system management.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called chsc_sch.
+
+	  If unsure, say N.
+
+config SCM_BUS
+	def_bool y
+	prompt "SCM bus driver"
+	help
+	  Bus driver for Storage Class Memory.
+
+config EADM_SCH
+	def_tristate m
+	prompt "Support for EADM subchannels"
+	depends on SCM_BUS
+	help
+	  This driver allows usage of EADM subchannels. EADM subchannels act
+	  as a communication vehicle for SCM increments.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called eadm_sch.
+
+endmenu
+
+menu "Dump support"
+
+config CRASH_DUMP
+	bool "kernel crash dumps"
+	depends on SMP
+	select KEXEC
+	help
+	  Generate crash dump after being started by kexec.
+	  Crash dump kernels are loaded in the main kernel with kexec-tools
+	  into a specially reserved region and then later executed after
+	  a crash by kdump/kexec.
+	  Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+	  This option also enables s390 zfcpdump.
+	  See also <file:Documentation/s390/zfcpdump.txt>
+
+endmenu
+
+menu "Executable file formats / Emulations"
+
+source "fs/Kconfig.binfmt"
+
+config SECCOMP
+	def_bool y
+	prompt "Enable seccomp to safely compute untrusted bytecode"
+	depends on PROC_FS
+	help
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via /proc/<pid>/seccomp, it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
+	  If unsure, say Y.
+
+endmenu
+
+menu "Power Management"
+
+config ARCH_HIBERNATION_POSSIBLE
+	def_bool y
+
+source "kernel/power/Kconfig"
+
+endmenu
+
+source "net/Kconfig"
+
+config PCMCIA
+	def_bool n
+
+config CCW
+	def_bool y
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/s390/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+menu "Virtualization"
+
+config PFAULT
+	def_bool y
+	prompt "Pseudo page fault support"
+	help
+	  Select this option, if you want to use PFAULT pseudo page fault
+	  handling under VM. If running native or in LPAR, this option
+	  has no effect. If your VM does not support PFAULT, PAGEEX
+	  pseudo page fault handling will be used.
+	  Note that VM 4.2 supports PFAULT but has a bug in its
+	  implementation that causes some problems.
+	  Everybody who wants to run Linux under VM != VM4.2 should select
+	  this option.
+
+config SHARED_KERNEL
+	bool "VM shared kernel support"
+	depends on !JUMP_LABEL
+	help
+	  Select this option, if you want to share the text segment of the
+	  Linux kernel between different VM guests. This reduces memory
+	  usage with lots of guests but greatly increases kernel size.
+	  Also if a kernel was IPL'ed from a shared segment the kexec system
+	  call will not work.
+	  You should only select this option if you know what you are
+	  doing and want to exploit this feature.
+
+config CMM
+	def_tristate n
+	prompt "Cooperative memory management"
+	help
+	  Select this option, if you want to enable the kernel interface
+	  to reduce the memory size of the system. This is accomplished
+	  by allocating pages of memory and put them "on hold". This only
+	  makes sense for a system running under VM where the unused pages
+	  will be reused by VM for other guest systems. The interface
+	  allows an external monitor to balance memory of many systems.
+	  Everybody who wants to run Linux under VM should select this
+	  option.
+
+config CMM_IUCV
+	def_bool y
+	prompt "IUCV special message interface to cooperative memory management"
+	depends on CMM && (SMSGIUCV=y || CMM=SMSGIUCV)
+	help
+	  Select this option to enable the special message interface to
+	  the cooperative memory management.
+
+config APPLDATA_BASE
+	def_bool n
+	prompt "Linux - VM Monitor Stream, base infrastructure"
+	depends on PROC_FS
+	help
+	  This provides a kernel interface for creating and updating z/VM APPLDATA
+	  monitor records. The monitor records are updated at certain time
+	  intervals, once the timer is started.
+	  Writing 1 or 0 to /proc/appldata/timer starts(1) or stops(0) the timer,
+	  i.e. enables or disables monitoring on the Linux side.
+	  A custom interval value (in seconds) can be written to
+	  /proc/appldata/interval.
+
+	  Defaults are 60 seconds interval and timer off.
+	  The /proc entries can also be read from, showing the current settings.
+
+config APPLDATA_MEM
+	def_tristate m
+	prompt "Monitor memory management statistics"
+	depends on APPLDATA_BASE && VM_EVENT_COUNTERS
+	help
+	  This provides memory management related data to the Linux - VM Monitor
+	  Stream, like paging/swapping rate, memory utilisation, etc.
+	  Writing 1 or 0 to /proc/appldata/memory creates(1) or removes(0) a z/VM
+	  APPLDATA monitor record, i.e. enables or disables monitoring this record
+	  on the z/VM side.
+
+	  Default is disabled.
+	  The /proc entry can also be read from, showing the current settings.
+
+	  This can also be compiled as a module, which will be called
+	  appldata_mem.o.
+
+config APPLDATA_OS
+	def_tristate m
+	prompt "Monitor OS statistics"
+	depends on APPLDATA_BASE
+	help
+	  This provides OS related data to the Linux - VM Monitor Stream, like
+	  CPU utilisation, etc.
+	  Writing 1 or 0 to /proc/appldata/os creates(1) or removes(0) a z/VM
+	  APPLDATA monitor record, i.e. enables or disables monitoring this record
+	  on the z/VM side.
+
+	  Default is disabled.
+	  This can also be compiled as a module, which will be called
+	  appldata_os.o.
+
+config APPLDATA_NET_SUM
+	def_tristate m
+	prompt "Monitor overall network statistics"
+	depends on APPLDATA_BASE && NET
+	help
+	  This provides network related data to the Linux - VM Monitor Stream,
+	  currently there is only a total sum of network I/O statistics, no
+	  per-interface data.
+	  Writing 1 or 0 to /proc/appldata/net_sum creates(1) or removes(0) a z/VM
+	  APPLDATA monitor record, i.e. enables or disables monitoring this record
+	  on the z/VM side.
+
+	  Default is disabled.
+	  This can also be compiled as a module, which will be called
+	  appldata_net_sum.o.
+
+config S390_HYPFS_FS
+	def_bool y
+	prompt "s390 hypervisor file system support"
+	select SYS_HYPERVISOR
+	help
+	  This is a virtual file system intended to provide accounting
+	  information in an s390 hypervisor environment.
+
+source "arch/s390/kvm/Kconfig"
+
+config S390_GUEST
+	def_bool y
+	prompt "s390 support for virtio devices"
+	select TTY
+	select VIRTUALIZATION
+	select VIRTIO
+	select VIRTIO_CONSOLE
+	help
+	  Enabling this option adds support for virtio based paravirtual device
+	  drivers on s390.
+
+	  Select this option if you want to run the kernel as a guest under
+	  the KVM hypervisor.
+
+endmenu
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
new file mode 100644
index 000000000..c56878e12
--- /dev/null
+++ b/arch/s390/Kconfig.debug
@@ -0,0 +1,35 @@
+menu "Kernel hacking"
+
+config TRACE_IRQFLAGS_SUPPORT
+	def_bool y
+
+source "lib/Kconfig.debug"
+
+config STRICT_DEVMEM
+	def_bool y
+	prompt "Filter access to /dev/mem"
+	---help---
+	  This option restricts access to /dev/mem.  If this option is
+	  disabled, you allow userspace access to all memory, including
+	  kernel and userspace memory. Accidental memory access is likely
+	  to be disastrous.
+	  Memory access is required for experts who want to debug the kernel.
+
+	  If you are unsure, say Y.
+
+config S390_PTDUMP
+	bool "Export kernel pagetable layout to userspace via debugfs"
+	depends on DEBUG_KERNEL
+	select DEBUG_FS
+	---help---
+	  Say Y here if you want to show the kernel pagetable layout in a
+	  debugfs file. This information is only useful for kernel developers
+	  who are working in architecture specific areas of the kernel.
+	  It is probably not a good idea to enable this feature in a production
+	  kernel.
+	  If in doubt, say "N"
+
+config DEBUG_SET_MODULE_RONX
+	def_bool y
+	depends on MODULES
+endmenu
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
new file mode 100644
index 000000000..667b1bca5
--- /dev/null
+++ b/arch/s390/Makefile
@@ -0,0 +1,130 @@
+#
+# s390/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+
+LD_BFD		:= elf64-s390
+LDFLAGS		:= -m elf64_s390
+KBUILD_AFLAGS_MODULE += -fPIC
+KBUILD_CFLAGS_MODULE += -fPIC
+KBUILD_CFLAGS	+= -m64
+KBUILD_AFLAGS	+= -m64
+UTS_MACHINE	:= s390x
+STACK_SIZE	:= 16384
+CHECKFLAGS	+= -D__s390__ -D__s390x__
+
+export LD_BFD
+
+mflags-$(CONFIG_MARCH_Z900)   := -march=z900
+mflags-$(CONFIG_MARCH_Z990)   := -march=z990
+mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
+mflags-$(CONFIG_MARCH_Z10)    := -march=z10
+mflags-$(CONFIG_MARCH_Z196)   := -march=z196
+mflags-$(CONFIG_MARCH_ZEC12)  := -march=zEC12
+mflags-$(CONFIG_MARCH_Z13)   := -march=z13
+
+aflags-y += $(mflags-y)
+cflags-y += $(mflags-y)
+
+cflags-$(CONFIG_MARCH_Z900_TUNE)	+= -mtune=z900
+cflags-$(CONFIG_MARCH_Z990_TUNE)	+= -mtune=z990
+cflags-$(CONFIG_MARCH_Z9_109_TUNE)	+= -mtune=z9-109
+cflags-$(CONFIG_MARCH_Z10_TUNE)		+= -mtune=z10
+cflags-$(CONFIG_MARCH_Z196_TUNE)	+= -mtune=z196
+cflags-$(CONFIG_MARCH_ZEC12_TUNE)	+= -mtune=zEC12
+cflags-$(CONFIG_MARCH_Z13_TUNE)	+= -mtune=z13
+
+#KBUILD_IMAGE is necessary for make rpm
+KBUILD_IMAGE	:=arch/s390/boot/image
+
+#
+# Prevent tail-call optimizations, to get clearer backtraces:
+#
+cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
+
+# old style option for packed stacks
+ifeq ($(call cc-option-yn,-mkernel-backchain),y)
+cflags-$(CONFIG_PACK_STACK)  += -mkernel-backchain -D__PACK_STACK
+aflags-$(CONFIG_PACK_STACK)  += -D__PACK_STACK
+endif
+
+# new style option for packed stacks
+ifeq ($(call cc-option-yn,-mpacked-stack),y)
+cflags-$(CONFIG_PACK_STACK)  += -mpacked-stack -D__PACK_STACK
+aflags-$(CONFIG_PACK_STACK)  += -D__PACK_STACK
+endif
+
+ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y)
+cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE)
+ifneq ($(call cc-option-yn,-mstack-size=8192),y)
+cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD)
+endif
+endif
+
+ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y)
+cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack
+endif
+
+ifdef CONFIG_FUNCTION_TRACER
+# make use of hotpatch feature if the compiler supports it
+cc_hotpatch	:= -mhotpatch=0,3
+ifeq ($(call cc-option-yn,$(cc_hotpatch)),y)
+CC_FLAGS_FTRACE := $(cc_hotpatch)
+KBUILD_AFLAGS	+= -DCC_USING_HOTPATCH
+KBUILD_CFLAGS	+= -DCC_USING_HOTPATCH
+endif
+endif
+
+KBUILD_CFLAGS	+= -mbackchain -msoft-float $(cflags-y)
+KBUILD_CFLAGS	+= -pipe -fno-strength-reduce -Wno-sign-compare
+KBUILD_AFLAGS	+= $(aflags-y)
+
+OBJCOPYFLAGS	:= -O binary
+
+head-y		:= arch/s390/kernel/head.o
+head-y		+= arch/s390/kernel/head64.o
+
+# See arch/s390/Kbuild for content of core part of the kernel
+core-y		+= arch/s390/
+
+libs-y		+= arch/s390/lib/
+drivers-y	+= drivers/s390/
+
+# must be linked after kernel
+drivers-$(CONFIG_OPROFILE)	+= arch/s390/oprofile/
+
+boot		:= arch/s390/boot
+
+all: image bzImage
+
+install: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+image bzImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+zfcpdump:
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
+	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+# Don't use tabs in echo arguments
+define archhelp
+  echo  '* image           - Kernel image for IPL ($(boot)/image)'
+  echo	'* bzImage         - Compressed kernel image for IPL ($(boot)/bzImage)'
+endef
diff --git a/arch/s390/appldata/Makefile b/arch/s390/appldata/Makefile
new file mode 100644
index 000000000..99f1cf071
--- /dev/null
+++ b/arch/s390/appldata/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the Linux - z/VM Monitor Stream.
+#
+
+obj-$(CONFIG_APPLDATA_BASE) += appldata_base.o
+obj-$(CONFIG_APPLDATA_MEM) += appldata_mem.o
+obj-$(CONFIG_APPLDATA_OS) += appldata_os.o
+obj-$(CONFIG_APPLDATA_NET_SUM) += appldata_net_sum.o
diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h
new file mode 100644
index 000000000..4a67f2b5f
--- /dev/null
+++ b/arch/s390/appldata/appldata.h
@@ -0,0 +1,47 @@
+/*
+ * Definitions and interface for Linux - z/VM Monitor Stream.
+ *
+ * Copyright IBM Corp. 2003, 2008
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define APPLDATA_MAX_REC_SIZE	  4024	/* Maximum size of the */
+					/* data buffer */
+#define APPLDATA_MAX_PROCS 100
+
+#define APPLDATA_PROC_NAME_LENGTH 16	/* Max. length of /proc name */
+
+#define APPLDATA_RECORD_MEM_ID		0x01	/* IDs to identify the */
+#define APPLDATA_RECORD_OS_ID		0x02	/* individual records, */
+#define APPLDATA_RECORD_NET_SUM_ID	0x03	/* must be < 256 !     */
+#define APPLDATA_RECORD_PROC_ID		0x04
+
+#define CTL_APPLDATA_TIMER 	2121	/* sysctl IDs, must be unique */
+#define CTL_APPLDATA_INTERVAL 	2122
+#define CTL_APPLDATA_MEM	2123
+#define CTL_APPLDATA_OS		2124
+#define CTL_APPLDATA_NET_SUM	2125
+#define CTL_APPLDATA_PROC	2126
+
+struct appldata_ops {
+	struct list_head list;
+	struct ctl_table_header *sysctl_header;
+	struct ctl_table *ctl_table;
+	int    active;				/* monitoring status */
+
+	/* fill in from here */
+	char name[APPLDATA_PROC_NAME_LENGTH];	/* name of /proc fs node */
+	unsigned char record_nr;		/* Record Nr. for Product ID */
+	void (*callback)(void *data);		/* callback function */
+	void *data;				/* record data */
+	unsigned int size;			/* size of record */
+	struct module *owner;			/* THIS_MODULE */
+	char mod_lvl[2];			/* modification level, EBCDIC */
+};
+
+extern int appldata_register_ops(struct appldata_ops *ops);
+extern void appldata_unregister_ops(struct appldata_ops *ops);
+extern int appldata_diag(char record_nr, u16 function, unsigned long buffer,
+			 u16 length, char *mod_lvl);
+
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
new file mode 100644
index 000000000..15c94246b
--- /dev/null
+++ b/arch/s390/appldata/appldata_base.c
@@ -0,0 +1,574 @@
+/*
+ * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1.
+ * Exports appldata_register_ops() and appldata_unregister_ops() for the
+ * data gathering modules.
+ *
+ * Copyright IBM Corp. 2003, 2009
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT	"appldata"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/workqueue.h>
+#include <linux/suspend.h>
+#include <linux/platform_device.h>
+#include <asm/appldata.h>
+#include <asm/vtimer.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+
+#include "appldata.h"
+
+
+#define APPLDATA_CPU_INTERVAL	10000		/* default (CPU) time for
+						   sampling interval in
+						   milliseconds */
+
+#define TOD_MICRO	0x01000			/* nr. of TOD clock units
+						   for 1 microsecond */
+
+static struct platform_device *appldata_pdev;
+
+/*
+ * /proc entries (sysctl)
+ */
+static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
+static int appldata_timer_handler(struct ctl_table *ctl, int write,
+				  void __user *buffer, size_t *lenp, loff_t *ppos);
+static int appldata_interval_handler(struct ctl_table *ctl, int write,
+					 void __user *buffer,
+					 size_t *lenp, loff_t *ppos);
+
+static struct ctl_table_header *appldata_sysctl_header;
+static struct ctl_table appldata_table[] = {
+	{
+		.procname	= "timer",
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= appldata_timer_handler,
+	},
+	{
+		.procname	= "interval",
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= appldata_interval_handler,
+	},
+	{ },
+};
+
+static struct ctl_table appldata_dir_table[] = {
+	{
+		.procname	= appldata_proc_name,
+		.maxlen		= 0,
+		.mode		= S_IRUGO | S_IXUGO,
+		.child		= appldata_table,
+	},
+	{ },
+};
+
+/*
+ * Timer
+ */
+static struct vtimer_list appldata_timer;
+
+static DEFINE_SPINLOCK(appldata_timer_lock);
+static int appldata_interval = APPLDATA_CPU_INTERVAL;
+static int appldata_timer_active;
+static int appldata_timer_suspended = 0;
+
+/*
+ * Work queue
+ */
+static struct workqueue_struct *appldata_wq;
+static void appldata_work_fn(struct work_struct *work);
+static DECLARE_WORK(appldata_work, appldata_work_fn);
+
+
+/*
+ * Ops list
+ */
+static DEFINE_MUTEX(appldata_ops_mutex);
+static LIST_HEAD(appldata_ops_list);
+
+
+/*************************** timer, work, DIAG *******************************/
+/*
+ * appldata_timer_function()
+ *
+ * schedule work and reschedule timer
+ */
+static void appldata_timer_function(unsigned long data)
+{
+	queue_work(appldata_wq, (struct work_struct *) data);
+}
+
+/*
+ * appldata_work_fn()
+ *
+ * call data gathering function for each (active) module
+ */
+static void appldata_work_fn(struct work_struct *work)
+{
+	struct list_head *lh;
+	struct appldata_ops *ops;
+
+	mutex_lock(&appldata_ops_mutex);
+	list_for_each(lh, &appldata_ops_list) {
+		ops = list_entry(lh, struct appldata_ops, list);
+		if (ops->active == 1) {
+			ops->callback(ops->data);
+		}
+	}
+	mutex_unlock(&appldata_ops_mutex);
+}
+
+/*
+ * appldata_diag()
+ *
+ * prepare parameter list, issue DIAG 0xDC
+ */
+int appldata_diag(char record_nr, u16 function, unsigned long buffer,
+			u16 length, char *mod_lvl)
+{
+	struct appldata_product_id id = {
+		.prod_nr    = {0xD3, 0xC9, 0xD5, 0xE4,
+			       0xE7, 0xD2, 0xD9},	/* "LINUXKR" */
+		.prod_fn    = 0xD5D3,			/* "NL" */
+		.version_nr = 0xF2F6,			/* "26" */
+		.release_nr = 0xF0F1,			/* "01" */
+	};
+
+	id.record_nr = record_nr;
+	id.mod_lvl = (mod_lvl[0]) << 8 | mod_lvl[1];
+	return appldata_asm(&id, function, (void *) buffer, length);
+}
+/************************ timer, work, DIAG <END> ****************************/
+
+
+/****************************** /proc stuff **********************************/
+
+#define APPLDATA_ADD_TIMER	0
+#define APPLDATA_DEL_TIMER	1
+#define APPLDATA_MOD_TIMER	2
+
+/*
+ * __appldata_vtimer_setup()
+ *
+ * Add, delete or modify virtual timers on all online cpus.
+ * The caller needs to get the appldata_timer_lock spinlock.
+ */
+static void __appldata_vtimer_setup(int cmd)
+{
+	u64 timer_interval = (u64) appldata_interval * 1000 * TOD_MICRO;
+
+	switch (cmd) {
+	case APPLDATA_ADD_TIMER:
+		if (appldata_timer_active)
+			break;
+		appldata_timer.expires = timer_interval;
+		add_virt_timer_periodic(&appldata_timer);
+		appldata_timer_active = 1;
+		break;
+	case APPLDATA_DEL_TIMER:
+		del_virt_timer(&appldata_timer);
+		if (!appldata_timer_active)
+			break;
+		appldata_timer_active = 0;
+		break;
+	case APPLDATA_MOD_TIMER:
+		if (!appldata_timer_active)
+			break;
+		mod_virt_timer_periodic(&appldata_timer, timer_interval);
+	}
+}
+
+/*
+ * appldata_timer_handler()
+ *
+ * Start/Stop timer, show status of timer (0 = not active, 1 = active)
+ */
+static int
+appldata_timer_handler(struct ctl_table *ctl, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int len;
+	char buf[2];
+
+	if (!*lenp || *ppos) {
+		*lenp = 0;
+		return 0;
+	}
+	if (!write) {
+		strncpy(buf, appldata_timer_active ? "1\n" : "0\n",
+			ARRAY_SIZE(buf));
+		len = strnlen(buf, ARRAY_SIZE(buf));
+		if (len > *lenp)
+			len = *lenp;
+		if (copy_to_user(buffer, buf, len))
+			return -EFAULT;
+		goto out;
+	}
+	len = *lenp;
+	if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
+		return -EFAULT;
+	spin_lock(&appldata_timer_lock);
+	if (buf[0] == '1')
+		__appldata_vtimer_setup(APPLDATA_ADD_TIMER);
+	else if (buf[0] == '0')
+		__appldata_vtimer_setup(APPLDATA_DEL_TIMER);
+	spin_unlock(&appldata_timer_lock);
+out:
+	*lenp = len;
+	*ppos += len;
+	return 0;
+}
+
+/*
+ * appldata_interval_handler()
+ *
+ * Set (CPU) timer interval for collection of data (in milliseconds), show
+ * current timer interval.
+ */
+static int
+appldata_interval_handler(struct ctl_table *ctl, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int len;
+	int interval;
+	char buf[16];
+
+	if (!*lenp || *ppos) {
+		*lenp = 0;
+		return 0;
+	}
+	if (!write) {
+		len = sprintf(buf, "%i\n", appldata_interval);
+		if (len > *lenp)
+			len = *lenp;
+		if (copy_to_user(buffer, buf, len))
+			return -EFAULT;
+		goto out;
+	}
+	len = *lenp;
+	if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
+		return -EFAULT;
+	interval = 0;
+	sscanf(buf, "%i", &interval);
+	if (interval <= 0)
+		return -EINVAL;
+
+	spin_lock(&appldata_timer_lock);
+	appldata_interval = interval;
+	__appldata_vtimer_setup(APPLDATA_MOD_TIMER);
+	spin_unlock(&appldata_timer_lock);
+out:
+	*lenp = len;
+	*ppos += len;
+	return 0;
+}
+
+/*
+ * appldata_generic_handler()
+ *
+ * Generic start/stop monitoring and DIAG, show status of
+ * monitoring (0 = not in process, 1 = in process)
+ */
+static int
+appldata_generic_handler(struct ctl_table *ctl, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct appldata_ops *ops = NULL, *tmp_ops;
+	unsigned int len;
+	int rc, found;
+	char buf[2];
+	struct list_head *lh;
+
+	found = 0;
+	mutex_lock(&appldata_ops_mutex);
+	list_for_each(lh, &appldata_ops_list) {
+		tmp_ops = list_entry(lh, struct appldata_ops, list);
+		if (&tmp_ops->ctl_table[2] == ctl) {
+			found = 1;
+		}
+	}
+	if (!found) {
+		mutex_unlock(&appldata_ops_mutex);
+		return -ENODEV;
+	}
+	ops = ctl->data;
+	if (!try_module_get(ops->owner)) {	// protect this function
+		mutex_unlock(&appldata_ops_mutex);
+		return -ENODEV;
+	}
+	mutex_unlock(&appldata_ops_mutex);
+
+	if (!*lenp || *ppos) {
+		*lenp = 0;
+		module_put(ops->owner);
+		return 0;
+	}
+	if (!write) {
+		strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf));
+		len = strnlen(buf, ARRAY_SIZE(buf));
+		if (len > *lenp)
+			len = *lenp;
+		if (copy_to_user(buffer, buf, len)) {
+			module_put(ops->owner);
+			return -EFAULT;
+		}
+		goto out;
+	}
+	len = *lenp;
+	if (copy_from_user(buf, buffer,
+			   len > sizeof(buf) ? sizeof(buf) : len)) {
+		module_put(ops->owner);
+		return -EFAULT;
+	}
+
+	mutex_lock(&appldata_ops_mutex);
+	if ((buf[0] == '1') && (ops->active == 0)) {
+		// protect work queue callback
+		if (!try_module_get(ops->owner)) {
+			mutex_unlock(&appldata_ops_mutex);
+			module_put(ops->owner);
+			return -ENODEV;
+		}
+		ops->callback(ops->data);	// init record
+		rc = appldata_diag(ops->record_nr,
+					APPLDATA_START_INTERVAL_REC,
+					(unsigned long) ops->data, ops->size,
+					ops->mod_lvl);
+		if (rc != 0) {
+			pr_err("Starting the data collection for %s "
+			       "failed with rc=%d\n", ops->name, rc);
+			module_put(ops->owner);
+		} else
+			ops->active = 1;
+	} else if ((buf[0] == '0') && (ops->active == 1)) {
+		ops->active = 0;
+		rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
+				(unsigned long) ops->data, ops->size,
+				ops->mod_lvl);
+		if (rc != 0)
+			pr_err("Stopping the data collection for %s "
+			       "failed with rc=%d\n", ops->name, rc);
+		module_put(ops->owner);
+	}
+	mutex_unlock(&appldata_ops_mutex);
+out:
+	*lenp = len;
+	*ppos += len;
+	module_put(ops->owner);
+	return 0;
+}
+
+/*************************** /proc stuff <END> *******************************/
+
+
+/************************* module-ops management *****************************/
+/*
+ * appldata_register_ops()
+ *
+ * update ops list, register /proc/sys entries
+ */
+int appldata_register_ops(struct appldata_ops *ops)
+{
+	if (ops->size > APPLDATA_MAX_REC_SIZE)
+		return -EINVAL;
+
+	ops->ctl_table = kzalloc(4 * sizeof(struct ctl_table), GFP_KERNEL);
+	if (!ops->ctl_table)
+		return -ENOMEM;
+
+	mutex_lock(&appldata_ops_mutex);
+	list_add(&ops->list, &appldata_ops_list);
+	mutex_unlock(&appldata_ops_mutex);
+
+	ops->ctl_table[0].procname = appldata_proc_name;
+	ops->ctl_table[0].maxlen   = 0;
+	ops->ctl_table[0].mode     = S_IRUGO | S_IXUGO;
+	ops->ctl_table[0].child    = &ops->ctl_table[2];
+
+	ops->ctl_table[2].procname = ops->name;
+	ops->ctl_table[2].mode     = S_IRUGO | S_IWUSR;
+	ops->ctl_table[2].proc_handler = appldata_generic_handler;
+	ops->ctl_table[2].data = ops;
+
+	ops->sysctl_header = register_sysctl_table(ops->ctl_table);
+	if (!ops->sysctl_header)
+		goto out;
+	return 0;
+out:
+	mutex_lock(&appldata_ops_mutex);
+	list_del(&ops->list);
+	mutex_unlock(&appldata_ops_mutex);
+	kfree(ops->ctl_table);
+	return -ENOMEM;
+}
+
+/*
+ * appldata_unregister_ops()
+ *
+ * update ops list, unregister /proc entries, stop DIAG if necessary
+ */
+void appldata_unregister_ops(struct appldata_ops *ops)
+{
+	mutex_lock(&appldata_ops_mutex);
+	list_del(&ops->list);
+	mutex_unlock(&appldata_ops_mutex);
+	unregister_sysctl_table(ops->sysctl_header);
+	kfree(ops->ctl_table);
+}
+/********************** module-ops management <END> **************************/
+
+
+/**************************** suspend / resume *******************************/
+static int appldata_freeze(struct device *dev)
+{
+	struct appldata_ops *ops;
+	int rc;
+	struct list_head *lh;
+
+	spin_lock(&appldata_timer_lock);
+	if (appldata_timer_active) {
+		__appldata_vtimer_setup(APPLDATA_DEL_TIMER);
+		appldata_timer_suspended = 1;
+	}
+	spin_unlock(&appldata_timer_lock);
+
+	mutex_lock(&appldata_ops_mutex);
+	list_for_each(lh, &appldata_ops_list) {
+		ops = list_entry(lh, struct appldata_ops, list);
+		if (ops->active == 1) {
+			rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
+					(unsigned long) ops->data, ops->size,
+					ops->mod_lvl);
+			if (rc != 0)
+				pr_err("Stopping the data collection for %s "
+				       "failed with rc=%d\n", ops->name, rc);
+		}
+	}
+	mutex_unlock(&appldata_ops_mutex);
+	return 0;
+}
+
+static int appldata_restore(struct device *dev)
+{
+	struct appldata_ops *ops;
+	int rc;
+	struct list_head *lh;
+
+	spin_lock(&appldata_timer_lock);
+	if (appldata_timer_suspended) {
+		__appldata_vtimer_setup(APPLDATA_ADD_TIMER);
+		appldata_timer_suspended = 0;
+	}
+	spin_unlock(&appldata_timer_lock);
+
+	mutex_lock(&appldata_ops_mutex);
+	list_for_each(lh, &appldata_ops_list) {
+		ops = list_entry(lh, struct appldata_ops, list);
+		if (ops->active == 1) {
+			ops->callback(ops->data);	// init record
+			rc = appldata_diag(ops->record_nr,
+					APPLDATA_START_INTERVAL_REC,
+					(unsigned long) ops->data, ops->size,
+					ops->mod_lvl);
+			if (rc != 0) {
+				pr_err("Starting the data collection for %s "
+				       "failed with rc=%d\n", ops->name, rc);
+			}
+		}
+	}
+	mutex_unlock(&appldata_ops_mutex);
+	return 0;
+}
+
+static int appldata_thaw(struct device *dev)
+{
+	return appldata_restore(dev);
+}
+
+static const struct dev_pm_ops appldata_pm_ops = {
+	.freeze		= appldata_freeze,
+	.thaw		= appldata_thaw,
+	.restore	= appldata_restore,
+};
+
+static struct platform_driver appldata_pdrv = {
+	.driver = {
+		.name	= "appldata",
+		.pm	= &appldata_pm_ops,
+	},
+};
+/************************* suspend / resume <END> ****************************/
+
+
+/******************************* init / exit *********************************/
+
+/*
+ * appldata_init()
+ *
+ * init timer, register /proc entries
+ */
+static int __init appldata_init(void)
+{
+	int rc;
+
+	init_virt_timer(&appldata_timer);
+	appldata_timer.function = appldata_timer_function;
+	appldata_timer.data = (unsigned long) &appldata_work;
+
+	rc = platform_driver_register(&appldata_pdrv);
+	if (rc)
+		return rc;
+
+	appldata_pdev = platform_device_register_simple("appldata", -1, NULL,
+							0);
+	if (IS_ERR(appldata_pdev)) {
+		rc = PTR_ERR(appldata_pdev);
+		goto out_driver;
+	}
+	appldata_wq = create_singlethread_workqueue("appldata");
+	if (!appldata_wq) {
+		rc = -ENOMEM;
+		goto out_device;
+	}
+
+	appldata_sysctl_header = register_sysctl_table(appldata_dir_table);
+	return 0;
+
+out_device:
+	platform_device_unregister(appldata_pdev);
+out_driver:
+	platform_driver_unregister(&appldata_pdrv);
+	return rc;
+}
+
+__initcall(appldata_init);
+
+/**************************** init / exit <END> ******************************/
+
+EXPORT_SYMBOL_GPL(appldata_register_ops);
+EXPORT_SYMBOL_GPL(appldata_unregister_ops);
+EXPORT_SYMBOL_GPL(appldata_diag);
+
+#ifdef CONFIG_SWAP
+EXPORT_SYMBOL_GPL(si_swapinfo);
+#endif
+EXPORT_SYMBOL_GPL(nr_threads);
+EXPORT_SYMBOL_GPL(nr_running);
+EXPORT_SYMBOL_GPL(nr_iowait);
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
new file mode 100644
index 000000000..edcf2a706
--- /dev/null
+++ b/arch/s390/appldata/appldata_mem.c
@@ -0,0 +1,164 @@
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects data related to memory management.
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+
+#include "appldata.h"
+
+
+#define P2K(x) ((x) << (PAGE_SHIFT - 10))	/* Converts #Pages to KB */
+
+/*
+ * Memory data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be
+ * updated.
+ *
+ * The record layout is documented in the Linux for zSeries Device Drivers
+ * book:
+ * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
+ */
+struct appldata_mem_data {
+	u64 timestamp;
+	u32 sync_count_1;       /* after VM collected the record data, */
+	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
+				   same. If not, the record has been updated on
+				   the Linux side while VM was collecting the
+				   (possibly corrupt) data */
+
+	u64 pgpgin;		/* data read from disk  */
+	u64 pgpgout;		/* data written to disk */
+	u64 pswpin;		/* pages swapped in  */
+	u64 pswpout;		/* pages swapped out */
+
+	u64 sharedram;		/* sharedram is currently set to 0 */
+
+	u64 totalram;		/* total main memory size */
+	u64 freeram;		/* free main memory size  */
+	u64 totalhigh;		/* total high memory size */
+	u64 freehigh;		/* free high memory size  */
+
+	u64 bufferram;		/* memory reserved for buffers, free cache */
+	u64 cached;		/* size of (used) cache, w/o buffers */
+	u64 totalswap;		/* total swap space size */
+	u64 freeswap;		/* free swap space */
+
+// New in 2.6 -->
+	u64 pgalloc;		/* page allocations */
+	u64 pgfault;		/* page faults (major+minor) */
+	u64 pgmajfault;		/* page faults (major only) */
+// <-- New in 2.6
+
+} __packed;
+
+
+/*
+ * appldata_get_mem_data()
+ *
+ * gather memory data
+ */
+static void appldata_get_mem_data(void *data)
+{
+	/*
+	 * don't put large structures on the stack, we are
+	 * serialized through the appldata_ops_mutex and can use static
+	 */
+	static struct sysinfo val;
+	unsigned long ev[NR_VM_EVENT_ITEMS];
+	struct appldata_mem_data *mem_data;
+
+	mem_data = data;
+	mem_data->sync_count_1++;
+
+	all_vm_events(ev);
+	mem_data->pgpgin     = ev[PGPGIN] >> 1;
+	mem_data->pgpgout    = ev[PGPGOUT] >> 1;
+	mem_data->pswpin     = ev[PSWPIN];
+	mem_data->pswpout    = ev[PSWPOUT];
+	mem_data->pgalloc    = ev[PGALLOC_NORMAL];
+	mem_data->pgalloc    += ev[PGALLOC_DMA];
+	mem_data->pgfault    = ev[PGFAULT];
+	mem_data->pgmajfault = ev[PGMAJFAULT];
+
+	si_meminfo(&val);
+	mem_data->sharedram = val.sharedram;
+	mem_data->totalram  = P2K(val.totalram);
+	mem_data->freeram   = P2K(val.freeram);
+	mem_data->totalhigh = P2K(val.totalhigh);
+	mem_data->freehigh  = P2K(val.freehigh);
+	mem_data->bufferram = P2K(val.bufferram);
+	mem_data->cached    = P2K(global_page_state(NR_FILE_PAGES)
+				- val.bufferram);
+
+	si_swapinfo(&val);
+	mem_data->totalswap = P2K(val.totalswap);
+	mem_data->freeswap  = P2K(val.freeswap);
+
+	mem_data->timestamp = get_tod_clock();
+	mem_data->sync_count_2++;
+}
+
+
+static struct appldata_ops ops = {
+	.name      = "mem",
+	.record_nr = APPLDATA_RECORD_MEM_ID,
+	.size	   = sizeof(struct appldata_mem_data),
+	.callback  = &appldata_get_mem_data,
+	.owner     = THIS_MODULE,
+	.mod_lvl   = {0xF0, 0xF0},		/* EBCDIC "00" */
+};
+
+
+/*
+ * appldata_mem_init()
+ *
+ * init_data, register ops
+ */
+static int __init appldata_mem_init(void)
+{
+	int ret;
+
+	ops.data = kzalloc(sizeof(struct appldata_mem_data), GFP_KERNEL);
+	if (!ops.data)
+		return -ENOMEM;
+
+	ret = appldata_register_ops(&ops);
+	if (ret)
+		kfree(ops.data);
+
+	return ret;
+}
+
+/*
+ * appldata_mem_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_mem_exit(void)
+{
+	appldata_unregister_ops(&ops);
+	kfree(ops.data);
+}
+
+
+module_init(appldata_mem_init);
+module_exit(appldata_mem_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, MEMORY statistics");
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
new file mode 100644
index 000000000..66037d262
--- /dev/null
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -0,0 +1,166 @@
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects accumulated network statistics (Packets received/transmitted,
+ * dropped, errors, ...).
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+
+#include "appldata.h"
+
+
+/*
+ * Network data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be updated.
+ *
+ * The record layout is documented in the Linux for zSeries Device Drivers
+ * book:
+ * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
+ */
+struct appldata_net_sum_data {
+	u64 timestamp;
+	u32 sync_count_1;	/* after VM collected the record data, */
+	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
+				   same. If not, the record has been updated on
+				   the Linux side while VM was collecting the
+				   (possibly corrupt) data */
+
+	u32 nr_interfaces;	/* nr. of network interfaces being monitored */
+
+	u32 padding;		/* next value is 64-bit aligned, so these */
+				/* 4 byte would be padded out by compiler */
+
+	u64 rx_packets;		/* total packets received        */
+	u64 tx_packets;		/* total packets transmitted     */
+	u64 rx_bytes;		/* total bytes received          */
+	u64 tx_bytes;		/* total bytes transmitted       */
+	u64 rx_errors;		/* bad packets received          */
+	u64 tx_errors;		/* packet transmit problems      */
+	u64 rx_dropped;		/* no space in linux buffers     */
+	u64 tx_dropped;		/* no space available in linux   */
+	u64 collisions;		/* collisions while transmitting */
+} __packed;
+
+
+/*
+ * appldata_get_net_sum_data()
+ *
+ * gather accumulated network statistics
+ */
+static void appldata_get_net_sum_data(void *data)
+{
+	int i;
+	struct appldata_net_sum_data *net_data;
+	struct net_device *dev;
+	unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors,
+			tx_errors, rx_dropped, tx_dropped, collisions;
+
+	net_data = data;
+	net_data->sync_count_1++;
+
+	i = 0;
+	rx_packets = 0;
+	tx_packets = 0;
+	rx_bytes   = 0;
+	tx_bytes   = 0;
+	rx_errors  = 0;
+	tx_errors  = 0;
+	rx_dropped = 0;
+	tx_dropped = 0;
+	collisions = 0;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
+		const struct rtnl_link_stats64 *stats;
+		struct rtnl_link_stats64 temp;
+
+		stats = dev_get_stats(dev, &temp);
+		rx_packets += stats->rx_packets;
+		tx_packets += stats->tx_packets;
+		rx_bytes   += stats->rx_bytes;
+		tx_bytes   += stats->tx_bytes;
+		rx_errors  += stats->rx_errors;
+		tx_errors  += stats->tx_errors;
+		rx_dropped += stats->rx_dropped;
+		tx_dropped += stats->tx_dropped;
+		collisions += stats->collisions;
+		i++;
+	}
+	rcu_read_unlock();
+
+	net_data->nr_interfaces = i;
+	net_data->rx_packets = rx_packets;
+	net_data->tx_packets = tx_packets;
+	net_data->rx_bytes   = rx_bytes;
+	net_data->tx_bytes   = tx_bytes;
+	net_data->rx_errors  = rx_errors;
+	net_data->tx_errors  = tx_errors;
+	net_data->rx_dropped = rx_dropped;
+	net_data->tx_dropped = tx_dropped;
+	net_data->collisions = collisions;
+
+	net_data->timestamp = get_tod_clock();
+	net_data->sync_count_2++;
+}
+
+
+static struct appldata_ops ops = {
+	.name	   = "net_sum",
+	.record_nr = APPLDATA_RECORD_NET_SUM_ID,
+	.size	   = sizeof(struct appldata_net_sum_data),
+	.callback  = &appldata_get_net_sum_data,
+	.owner     = THIS_MODULE,
+	.mod_lvl   = {0xF0, 0xF0},		/* EBCDIC "00" */
+};
+
+
+/*
+ * appldata_net_init()
+ *
+ * init data, register ops
+ */
+static int __init appldata_net_init(void)
+{
+	int ret;
+
+	ops.data = kzalloc(sizeof(struct appldata_net_sum_data), GFP_KERNEL);
+	if (!ops.data)
+		return -ENOMEM;
+
+	ret = appldata_register_ops(&ops);
+	if (ret)
+		kfree(ops.data);
+
+	return ret;
+}
+
+/*
+ * appldata_net_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_net_exit(void)
+{
+	appldata_unregister_ops(&ops);
+	kfree(ops.data);
+}
+
+
+module_init(appldata_net_init);
+module_exit(appldata_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, accumulated network statistics");
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
new file mode 100644
index 000000000..69b23b25a
--- /dev/null
+++ b/arch/s390/appldata/appldata_os.c
@@ -0,0 +1,218 @@
+/*
+ * Data gathering module for Linux-VM Monitor Stream, Stage 1.
+ * Collects misc. OS related data (CPU utilization, running processes).
+ *
+ * Copyright IBM Corp. 2003, 2006
+ *
+ * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT	"appldata"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <asm/appldata.h>
+#include <asm/smp.h>
+
+#include "appldata.h"
+
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+/*
+ * OS data
+ *
+ * This is accessed as binary data by z/VM. If changes to it can't be avoided,
+ * the structure version (product ID, see appldata_base.c) needs to be changed
+ * as well and all documentation and z/VM applications using it must be
+ * updated.
+ *
+ * The record layout is documented in the Linux for zSeries Device Drivers
+ * book:
+ * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
+ */
+struct appldata_os_per_cpu {
+	u32 per_cpu_user;	/* timer ticks spent in user mode   */
+	u32 per_cpu_nice;	/* ... spent with modified priority */
+	u32 per_cpu_system;	/* ... spent in kernel mode         */
+	u32 per_cpu_idle;	/* ... spent in idle mode           */
+
+	/* New in 2.6 */
+	u32 per_cpu_irq;	/* ... spent in interrupts          */
+	u32 per_cpu_softirq;	/* ... spent in softirqs            */
+	u32 per_cpu_iowait;	/* ... spent while waiting for I/O  */
+
+	/* New in modification level 01 */
+	u32 per_cpu_steal;	/* ... stolen by hypervisor	    */
+	u32 cpu_id;		/* number of this CPU		    */
+} __attribute__((packed));
+
+struct appldata_os_data {
+	u64 timestamp;
+	u32 sync_count_1;	/* after VM collected the record data, */
+	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
+				   same. If not, the record has been updated on
+				   the Linux side while VM was collecting the
+				   (possibly corrupt) data */
+
+	u32 nr_cpus;		/* number of (virtual) CPUs        */
+	u32 per_cpu_size;	/* size of the per-cpu data struct */
+	u32 cpu_offset;		/* offset of the first per-cpu data struct */
+
+	u32 nr_running;		/* number of runnable threads      */
+	u32 nr_threads;		/* number of threads               */
+	u32 avenrun[3];		/* average nr. of running processes during */
+				/* the last 1, 5 and 15 minutes */
+
+	/* New in 2.6 */
+	u32 nr_iowait;		/* number of blocked threads
+				   (waiting for I/O)               */
+
+	/* per cpu data */
+	struct appldata_os_per_cpu os_cpu[0];
+} __attribute__((packed));
+
+static struct appldata_os_data *appldata_os_data;
+
+static struct appldata_ops ops = {
+	.name	   = "os",
+	.record_nr = APPLDATA_RECORD_OS_ID,
+	.owner	   = THIS_MODULE,
+	.mod_lvl   = {0xF0, 0xF1},		/* EBCDIC "01" */
+};
+
+
+/*
+ * appldata_get_os_data()
+ *
+ * gather OS data
+ */
+static void appldata_get_os_data(void *data)
+{
+	int i, j, rc;
+	struct appldata_os_data *os_data;
+	unsigned int new_size;
+
+	os_data = data;
+	os_data->sync_count_1++;
+
+	os_data->nr_threads = nr_threads;
+	os_data->nr_running = nr_running();
+	os_data->nr_iowait  = nr_iowait();
+	os_data->avenrun[0] = avenrun[0] + (FIXED_1/200);
+	os_data->avenrun[1] = avenrun[1] + (FIXED_1/200);
+	os_data->avenrun[2] = avenrun[2] + (FIXED_1/200);
+
+	j = 0;
+	for_each_online_cpu(i) {
+		os_data->os_cpu[j].per_cpu_user =
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
+		os_data->os_cpu[j].per_cpu_nice =
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
+		os_data->os_cpu[j].per_cpu_system =
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
+		os_data->os_cpu[j].per_cpu_idle =
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
+		os_data->os_cpu[j].per_cpu_irq =
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
+		os_data->os_cpu[j].per_cpu_softirq =
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
+		os_data->os_cpu[j].per_cpu_iowait =
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
+		os_data->os_cpu[j].per_cpu_steal =
+			cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
+		os_data->os_cpu[j].cpu_id = i;
+		j++;
+	}
+
+	os_data->nr_cpus = j;
+
+	new_size = sizeof(struct appldata_os_data) +
+		   (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu));
+	if (ops.size != new_size) {
+		if (ops.active) {
+			rc = appldata_diag(APPLDATA_RECORD_OS_ID,
+					   APPLDATA_START_INTERVAL_REC,
+					   (unsigned long) ops.data, new_size,
+					   ops.mod_lvl);
+			if (rc != 0)
+				pr_err("Starting a new OS data collection "
+				       "failed with rc=%d\n", rc);
+
+			rc = appldata_diag(APPLDATA_RECORD_OS_ID,
+					   APPLDATA_STOP_REC,
+					   (unsigned long) ops.data, ops.size,
+					   ops.mod_lvl);
+			if (rc != 0)
+				pr_err("Stopping a faulty OS data "
+				       "collection failed with rc=%d\n", rc);
+		}
+		ops.size = new_size;
+	}
+	os_data->timestamp = get_tod_clock();
+	os_data->sync_count_2++;
+}
+
+
+/*
+ * appldata_os_init()
+ *
+ * init data, register ops
+ */
+static int __init appldata_os_init(void)
+{
+	int rc, max_size;
+
+	max_size = sizeof(struct appldata_os_data) +
+		   (num_possible_cpus() * sizeof(struct appldata_os_per_cpu));
+	if (max_size > APPLDATA_MAX_REC_SIZE) {
+		pr_err("Maximum OS record size %i exceeds the maximum "
+		       "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	appldata_os_data = kzalloc(max_size, GFP_KERNEL | GFP_DMA);
+	if (appldata_os_data == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu);
+	appldata_os_data->cpu_offset   = offsetof(struct appldata_os_data,
+							os_cpu);
+
+	ops.data = appldata_os_data;
+	ops.callback  = &appldata_get_os_data;
+	rc = appldata_register_ops(&ops);
+	if (rc != 0)
+		kfree(appldata_os_data);
+out:
+	return rc;
+}
+
+/*
+ * appldata_os_exit()
+ *
+ * unregister ops
+ */
+static void __exit appldata_os_exit(void)
+{
+	appldata_unregister_ops(&ops);
+	kfree(appldata_os_data);
+}
+
+
+module_init(appldata_os_init);
+module_exit(appldata_os_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Gerald Schaefer");
+MODULE_DESCRIPTION("Linux-VM Monitor Stream, OS statistics");
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
new file mode 100644
index 000000000..017d5912a
--- /dev/null
+++ b/arch/s390/boot/.gitignore
@@ -0,0 +1,2 @@
+image
+bzImage
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
new file mode 100644
index 000000000..9a42ecec5
--- /dev/null
+++ b/arch/s390/boot/Makefile
@@ -0,0 +1,26 @@
+#
+# Makefile for the linux s390-specific parts of the memory manager.
+#
+
+COMPILE_VERSION := __linux_compile_version_id__`hostname |  \
+			tr -c '[0-9A-Za-z]' '_'`__`date | \
+			tr -c '[0-9A-Za-z]' '_'`_t
+
+ccflags-y  := -DCOMPILE_VERSION=$(COMPILE_VERSION) -gstabs -I.
+
+targets := image
+targets += bzImage
+subdir- := compressed
+
+$(obj)/image: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/bzImage: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/compressed/vmlinux: FORCE
+	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
+
+install: $(CONFIGURE) $(obj)/bzImage
+	sh -x  $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
+	      System.map "$(INSTALL_PATH)"
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
new file mode 100644
index 000000000..ae06b9b4c
--- /dev/null
+++ b/arch/s390/boot/compressed/.gitignore
@@ -0,0 +1,3 @@
+sizes.h
+vmlinux
+vmlinux.lds
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
new file mode 100644
index 000000000..d4788111c
--- /dev/null
+++ b/arch/s390/boot/compressed/Makefile
@@ -0,0 +1,69 @@
+#
+# linux/arch/s390/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += misc.o piggy.o sizes.h head.o
+
+KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
+KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
+KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
+
+GCOV_PROFILE := n
+
+OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o)
+OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o
+
+LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS)
+	$(call if_changed,ld)
+	@:
+
+sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p'
+
+quiet_cmd_sizes = GEN $@
+      cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
+
+$(obj)/sizes.h: vmlinux
+	$(call if_changed,sizes)
+
+AFLAGS_head.o += -I$(obj)
+$(obj)/head.o: $(obj)/sizes.h
+
+CFLAGS_misc.o += -I$(obj)
+$(obj)/misc.o: $(obj)/sizes.h
+
+OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
+$(obj)/vmlinux.bin: vmlinux
+	$(call if_changed,objcopy)
+
+vmlinux.bin.all-y := $(obj)/vmlinux.bin
+
+suffix-$(CONFIG_KERNEL_GZIP)  := gz
+suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZ4)  := lz4
+suffix-$(CONFIG_KERNEL_LZMA)  := lzma
+suffix-$(CONFIG_KERNEL_LZO)  := lzo
+suffix-$(CONFIG_KERNEL_XZ)  := xz
+
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+	$(call if_changed,gzip)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+	$(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+	$(call if_changed,lz4)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+	$(call if_changed,lzma)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+	$(call if_changed,lzo)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+	$(call if_changed,xzkern)
+
+LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
+	$(call if_changed,ld)
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
new file mode 100644
index 000000000..f86a4eef2
--- /dev/null
+++ b/arch/s390/boot/compressed/head.S
@@ -0,0 +1,48 @@
+/*
+ * Startup glue code to uncompress the kernel
+ *
+ * Copyright IBM Corp. 2010
+ *
+ *   Author(s):	Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include "sizes.h"
+
+__HEAD
+ENTRY(startup_continue)
+	basr	%r13,0			# get base
+.LPG1:
+	# setup stack
+	lg	%r15,.Lstack-.LPG1(%r13)
+	aghi	%r15,-160
+	brasl	%r14,decompress_kernel
+	# setup registers for memory mover & branch to target
+	lgr	%r4,%r2
+	lg	%r2,.Loffset-.LPG1(%r13)
+	la	%r4,0(%r2,%r4)
+	lg	%r3,.Lmvsize-.LPG1(%r13)
+	lgr	%r5,%r3
+	# move the memory mover someplace safe
+	la	%r1,0x200
+	mvc	0(mover_end-mover,%r1),mover-.LPG1(%r13)
+	# decompress image is started at 0x11000
+	lgr	%r6,%r2
+	br	%r1
+mover:
+	mvcle	%r2,%r4,0
+	jo	mover
+	br	%r6
+mover_end:
+
+	.align	8
+.Lstack:
+	.quad	0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
+.Loffset:
+	.quad	0x11000
+.Lmvsize:
+	.quad	SZ__bss_start
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
new file mode 100644
index 000000000..42506b371
--- /dev/null
+++ b/arch/s390/boot/compressed/misc.c
@@ -0,0 +1,174 @@
+/*
+ * Definitions and wrapper functions for kernel decompressor
+ *
+ * Copyright IBM Corp. 2010
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <asm/uaccess.h>
+#include <asm/page.h>
+#include <asm/sclp.h>
+#include <asm/ipl.h>
+#include "sizes.h"
+
+/*
+ * gzip declarations
+ */
+#define STATIC static
+
+#undef memset
+#undef memcpy
+#undef memmove
+#define memmove memmove
+#define memzero(s, n) memset((s), 0, (n))
+
+/* Symbols defined by linker scripts */
+extern char input_data[];
+extern int input_len;
+extern char _text, _end;
+extern char _bss, _ebss;
+
+static void error(char *m);
+
+static unsigned long free_mem_ptr;
+static unsigned long free_mem_end_ptr;
+
+#ifdef CONFIG_HAVE_KERNEL_BZIP2
+#define HEAP_SIZE	0x400000
+#else
+#define HEAP_SIZE	0x10000
+#endif
+
+#ifdef CONFIG_KERNEL_GZIP
+#include "../../../../lib/decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_BZIP2
+#include "../../../../lib/decompress_bunzip2.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZMA
+#include "../../../../lib/decompress_unlzma.c"
+#endif
+
+#ifdef CONFIG_KERNEL_LZO
+#include "../../../../lib/decompress_unlzo.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+#include "../../../../lib/decompress_unxz.c"
+#endif
+
+static int puts(const char *s)
+{
+	_sclp_print_early(s);
+	return 0;
+}
+
+void *memset(void *s, int c, size_t n)
+{
+	char *xs;
+
+	xs = s;
+	while (n--)
+		*xs++ = c;
+	return s;
+}
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+	const char *s = src;
+	char *d = dest;
+
+	while (n--)
+		*d++ = *s++;
+	return dest;
+}
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+	const char *s = src;
+	char *d = dest;
+
+	if (d <= s) {
+		while (n--)
+			*d++ = *s++;
+	} else {
+		d += n;
+		s += n;
+		while (n--)
+			*--d = *--s;
+	}
+	return dest;
+}
+
+static void error(char *x)
+{
+	unsigned long long psw = 0x000a0000deadbeefULL;
+
+	puts("\n\n");
+	puts(x);
+	puts("\n\n -- System halted");
+
+	asm volatile("lpsw %0" : : "Q" (psw));
+}
+
+/*
+ * Safe guard the ipl parameter block against a memory area that will be
+ * overwritten. The validity check for the ipl parameter block is complex
+ * (see cio_get_iplinfo and ipl_save_parameters) but if the pointer to
+ * the ipl parameter block intersects with the passed memory area we can
+ * safely assume that we can read from that memory. In that case just copy
+ * the memory to IPL_PARMBLOCK_ORIGIN even if there is no ipl parameter
+ * block.
+ */
+static void check_ipl_parmblock(void *start, unsigned long size)
+{
+	void *src, *dst;
+
+	src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr;
+	if (src + PAGE_SIZE <= start || src >= start + size)
+		return;
+	dst = (void *) IPL_PARMBLOCK_ORIGIN;
+	memmove(dst, src, PAGE_SIZE);
+	S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
+}
+
+unsigned long decompress_kernel(void)
+{
+	unsigned long output_addr;
+	unsigned char *output;
+
+	output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL;
+	check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start);
+	memset(&_bss, 0, &_ebss - &_bss);
+	free_mem_ptr = (unsigned long)&_end;
+	free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
+	output = (unsigned char *) output_addr;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/*
+	 * Move the initrd right behind the end of the decompressed
+	 * kernel image.
+	 */
+	if (INITRD_START && INITRD_SIZE &&
+	    INITRD_START < (unsigned long) output + SZ__bss_start) {
+		check_ipl_parmblock(output + SZ__bss_start,
+				    INITRD_START + INITRD_SIZE);
+		memmove(output + SZ__bss_start,
+			(void *) INITRD_START, INITRD_SIZE);
+		INITRD_START = (unsigned long) output + SZ__bss_start;
+	}
+#endif
+
+	puts("Uncompressing Linux... ");
+	decompress(input_data, input_len, NULL, NULL, output, NULL, error);
+	puts("Ok, booting the kernel.\n");
+	return (unsigned long) output;
+}
+
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
new file mode 100644
index 000000000..747735f83
--- /dev/null
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -0,0 +1,50 @@
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+
+ENTRY(startup)
+
+SECTIONS
+{
+	/* Be careful parts of head_64.S assume startup_32 is at
+	 * address 0.
+	 */
+	. = 0;
+	.head.text : {
+		_head = . ;
+		HEAD_TEXT
+		_ehead = . ;
+	}
+	.rodata.compressed : {
+		*(.rodata.compressed)
+	}
+	.text :	{
+		_text = .;	/* Text */
+		*(.text)
+		*(.text.*)
+		_etext = . ;
+	}
+	.rodata : {
+		_rodata = . ;
+		*(.rodata)	 /* read-only data */
+		*(.rodata.*)
+		_erodata = . ;
+	}
+	.data :	{
+		_data = . ;
+		*(.data)
+		*(.data.*)
+		_edata = . ;
+	}
+	. = ALIGN(256);
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		. = ALIGN(8);	/* For convenience during zeroing */
+		_ebss = .;
+	}
+	_end = .;
+}
diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr
new file mode 100644
index 000000000..f02382ae5
--- /dev/null
+++ b/arch/s390/boot/compressed/vmlinux.scr
@@ -0,0 +1,10 @@
+SECTIONS
+{
+  .rodata.compressed : {
+	input_len = .;
+	LONG(input_data_end - input_data) input_data = .;
+	*(.data)
+	output_len = . - 4;
+	input_data_end = .;
+	}
+}
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
new file mode 100644
index 000000000..aed306969
--- /dev/null
+++ b/arch/s390/boot/install.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+#
+# arch/s390x/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for s390 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+	mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+	mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
new file mode 100644
index 000000000..64707750c
--- /dev/null
+++ b/arch/s390/configs/default_defconfig
@@ -0,0 +1,678 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_PREEMPT=y
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_PCI_DEBUG=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_RDS_DEBUG=y
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_XFS_DEBUG=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_READABLE_ASM=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_SELFTEST=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_SLUB_STATS=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_VM_RB=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_CREDENTIALS=y
+CONFIG_PROVE_RCU=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=300
+CONFIG_NOTIFIER_ERROR_INJECTION=m
+CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
+CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAILSLAB=y
+CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAIL_MAKE_REQUEST=y
+CONFIG_FAIL_IO_TIMEOUT=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
+CONFIG_LATENCYTOP=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_UPROBE_EVENT=y
+CONFIG_LKDTM=m
+CONFIG_TEST_LIST_SORT=y
+CONFIG_KPROBES_SANITY_TEST=y
+CONFIG_RBTREE_TEST=y
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_STRING_HELPERS=y
+CONFIG_TEST_KSTRTOX=y
+CONFIG_DMA_API_DEBUG=y
+CONFIG_TEST_BPF=m
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
new file mode 100644
index 000000000..5c3097272
--- /dev/null
+++ b/arch/s390/configs/gcov_defconfig
@@ -0,0 +1,625 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_GCOV_KERNEL=y
+CONFIG_GCOV_PROFILE_ALL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_TIMER_STATS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_NOTIFIER_ERROR_INJECTION=m
+CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
+CONFIG_PM_NOTIFIER_ERROR_INJECT=m
+CONFIG_LATENCYTOP=y
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_KPROBE_EVENT is not set
+CONFIG_LKDTM=m
+CONFIG_RBTREE_TEST=m
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
new file mode 100644
index 000000000..bda70f1ff
--- /dev/null
+++ b/arch/s390/configs/performance_defconfig
@@ -0,0 +1,620 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_CGROUP_PERF=y
+CONFIG_BLK_CGROUP=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=256
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CT_PROTO_DCCP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_EXTHDR=m
+CONFIG_NFT_META=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_RBTREE=m
+CONFIG_NFT_HASH=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_NF_TABLES_IPV4=m
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=m
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_NF_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NET_SCTPPROBE=m
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_DEVTMPFS=y
+CONFIG_CONNECTOR=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_OSD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
+CONFIG_BLK_DEV_XIP=y
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_ATA_OVER_ETH=m
+CONFIG_VIRTIO_BLK=y
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_MULTIPATH_QL=m
+CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
+CONFIG_DM_VERITY=m
+CONFIG_DM_SWITCH=m
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_TUN=m
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+CONFIG_VHOST_NET=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_VIRTIO_BALLOON=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XIP=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD_DEBUG=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_OCFS2_FS=m
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_TIMER_STATS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_UPROBE_EVENT=y
+CONFIG_LKDTM=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEYS_DEBUG_PROC_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_IMA=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_ZLIB=y
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
+CONFIG_ASYMMETRIC_KEY_TYPE=m
+CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
+CONFIG_X509_CERTIFICATE_PARSER=m
+CONFIG_CRC7=m
+CONFIG_CRC8=m
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
new file mode 100644
index 000000000..1b0184a0f
--- /dev/null
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -0,0 +1,82 @@
+# CONFIG_SWAP is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_TUNE_ZEC12=y
+# CONFIG_COMPAT is not set
+CONFIG_NR_CPUS=2
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_HZ_100=y
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+# CONFIG_CHECK_STACK is not set
+# CONFIG_CHSC_SCH is not set
+# CONFIG_SCM_BUS is not set
+CONFIG_CRASH_DUMP=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SECCOMP is not set
+CONFIG_NET=y
+# CONFIG_IUCV is not set
+CONFIG_ATM=y
+CONFIG_ATM_LANE=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV_XPRAM is not set
+# CONFIG_DCSSBLK is not set
+# CONFIG_DASD is not set
+CONFIG_ENCLOSURE_SERVICES=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_ENCLOSURE=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SRP_ATTRS=y
+CONFIG_ZFCP=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_HVC_IUCV is not set
+CONFIG_RAW_DRIVER=y
+# CONFIG_SCLP_ASYNC is not set
+# CONFIG_HMC_DRV is not set
+# CONFIG_S390_TAPE is not set
+# CONFIG_VMCP is not set
+# CONFIG_MONWRITER is not set
+# CONFIG_S390_VMUR is not set
+# CONFIG_HID is not set
+CONFIG_MEMSTICK=y
+CONFIG_MEMSTICK_DEBUG=y
+CONFIG_MEMSTICK_UNSAFE_RESUME=y
+CONFIG_MSPRO_BLOCK=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+# CONFIG_INOTIFY_USER is not set
+CONFIG_CONFIGFS_FS=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_PANIC_ON_OOPS=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_FTRACE is not set
+# CONFIG_STRICT_DEVMEM is not set
+# CONFIG_PFAULT is not set
+# CONFIG_S390_HYPFS_FS is not set
+# CONFIG_VIRTUALIZATION is not set
+# CONFIG_S390_GUEST is not set
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
new file mode 100644
index 000000000..7f0b7cda6
--- /dev/null
+++ b/arch/s390/crypto/Makefile
@@ -0,0 +1,11 @@
+#
+# Cryptographic API
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_S390) += sha1_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA256_S390) += sha256_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_SHA512_S390) += sha512_s390.o sha_common.o
+obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
+obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
+obj-$(CONFIG_S390_PRNG) += prng.o
+obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
new file mode 100644
index 000000000..5566ce80a
--- /dev/null
+++ b/arch/s390/crypto/aes_s390.c
@@ -0,0 +1,985 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the AES Cipher Algorithm.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2005, 2007
+ *   Author(s): Jan Glauber (jang@de.ibm.com)
+ *		Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback
+ *
+ * Derived from "crypto/aes_generic.c"
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#define KMSG_COMPONENT "aes_s390"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include "crypt_s390.h"
+
+#define AES_KEYLEN_128		1
+#define AES_KEYLEN_192		2
+#define AES_KEYLEN_256		4
+
+static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
+static char keylen_flag;
+
+struct s390_aes_ctx {
+	u8 key[AES_MAX_KEY_SIZE];
+	long enc;
+	long dec;
+	int key_len;
+	union {
+		struct crypto_blkcipher *blk;
+		struct crypto_cipher *cip;
+	} fallback;
+};
+
+struct pcc_param {
+	u8 key[32];
+	u8 tweak[16];
+	u8 block[16];
+	u8 bit[16];
+	u8 xts[16];
+};
+
+struct s390_xts_ctx {
+	u8 key[32];
+	u8 pcc_key[32];
+	long enc;
+	long dec;
+	int key_len;
+	struct crypto_blkcipher *fallback;
+};
+
+/*
+ * Check if the key_len is supported by the HW.
+ * Returns 0 if it is, a positive number if it is not and software fallback is
+ * required or a negative number in case the key size is not valid
+ */
+static int need_fallback(unsigned int key_len)
+{
+	switch (key_len) {
+	case 16:
+		if (!(keylen_flag & AES_KEYLEN_128))
+			return 1;
+		break;
+	case 24:
+		if (!(keylen_flag & AES_KEYLEN_192))
+			return 1;
+		break;
+	case 32:
+		if (!(keylen_flag & AES_KEYLEN_256))
+			return 1;
+		break;
+	default:
+		return -1;
+		break;
+	}
+	return 0;
+}
+
+static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
+		unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags &
+			CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
+	if (ret) {
+		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+		tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags &
+				CRYPTO_TFM_RES_MASK);
+	}
+	return ret;
+}
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int ret;
+
+	ret = need_fallback(key_len);
+	if (ret < 0) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	sctx->key_len = key_len;
+	if (!ret) {
+		memcpy(sctx->key, in_key, key_len);
+		return 0;
+	}
+
+	return setkey_fallback_cip(tfm, in_key, key_len);
+}
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	if (unlikely(need_fallback(sctx->key_len))) {
+		crypto_cipher_encrypt_one(sctx->fallback.cip, out, in);
+		return;
+	}
+
+	switch (sctx->key_len) {
+	case 16:
+		crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in,
+			      AES_BLOCK_SIZE);
+		break;
+	case 24:
+		crypt_s390_km(KM_AES_192_ENCRYPT, &sctx->key, out, in,
+			      AES_BLOCK_SIZE);
+		break;
+	case 32:
+		crypt_s390_km(KM_AES_256_ENCRYPT, &sctx->key, out, in,
+			      AES_BLOCK_SIZE);
+		break;
+	}
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	if (unlikely(need_fallback(sctx->key_len))) {
+		crypto_cipher_decrypt_one(sctx->fallback.cip, out, in);
+		return;
+	}
+
+	switch (sctx->key_len) {
+	case 16:
+		crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in,
+			      AES_BLOCK_SIZE);
+		break;
+	case 24:
+		crypt_s390_km(KM_AES_192_DECRYPT, &sctx->key, out, in,
+			      AES_BLOCK_SIZE);
+		break;
+	case 32:
+		crypt_s390_km(KM_AES_256_DECRYPT, &sctx->key, out, in,
+			      AES_BLOCK_SIZE);
+		break;
+	}
+}
+
+static int fallback_init_cip(struct crypto_tfm *tfm)
+{
+	const char *name = tfm->__crt_alg->cra_name;
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	sctx->fallback.cip = crypto_alloc_cipher(name, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(sctx->fallback.cip)) {
+		pr_err("Allocating AES fallback algorithm %s failed\n",
+		       name);
+		return PTR_ERR(sctx->fallback.cip);
+	}
+
+	return 0;
+}
+
+static void fallback_exit_cip(struct crypto_tfm *tfm)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(sctx->fallback.cip);
+	sctx->fallback.cip = NULL;
+}
+
+static struct crypto_alg aes_alg = {
+	.cra_name		=	"aes",
+	.cra_driver_name	=	"aes-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_init               =       fallback_init_cip,
+	.cra_exit               =       fallback_exit_cip,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
+			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
+			.cia_setkey		=	aes_set_key,
+			.cia_encrypt		=	aes_encrypt,
+			.cia_decrypt		=	aes_decrypt,
+		}
+	}
+};
+
+static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
+		unsigned int len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	unsigned int ret;
+
+	sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags &
+			CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_blkcipher_setkey(sctx->fallback.blk, key, len);
+	if (ret) {
+		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+		tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags &
+				CRYPTO_TFM_RES_MASK);
+	}
+	return ret;
+}
+
+static int fallback_blk_dec(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	unsigned int ret;
+	struct crypto_blkcipher *tfm;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+
+	tfm = desc->tfm;
+	desc->tfm = sctx->fallback.blk;
+
+	ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int fallback_blk_enc(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	unsigned int ret;
+	struct crypto_blkcipher *tfm;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+
+	tfm = desc->tfm;
+	desc->tfm = sctx->fallback.blk;
+
+	ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = need_fallback(key_len);
+	if (ret > 0) {
+		sctx->key_len = key_len;
+		return setkey_fallback_blk(tfm, in_key, key_len);
+	}
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KM_AES_128_ENCRYPT;
+		sctx->dec = KM_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KM_AES_192_ENCRYPT;
+		sctx->dec = KM_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KM_AES_256_ENCRYPT;
+		sctx->dec = KM_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+			 struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes;
+
+	while ((nbytes = walk->nbytes)) {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, param, out, in, n);
+		if (ret < 0 || ret != n)
+			return -EIO;
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return ret;
+}
+
+static int ecb_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(need_fallback(sctx->key_len)))
+		return fallback_blk_enc(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk);
+}
+
+static int ecb_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(need_fallback(sctx->key_len)))
+		return fallback_blk_dec(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk);
+}
+
+static int fallback_init_blk(struct crypto_tfm *tfm)
+{
+	const char *name = tfm->__crt_alg->cra_name;
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	sctx->fallback.blk = crypto_alloc_blkcipher(name, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(sctx->fallback.blk)) {
+		pr_err("Allocating AES fallback algorithm %s failed\n",
+		       name);
+		return PTR_ERR(sctx->fallback.blk);
+	}
+
+	return 0;
+}
+
+static void fallback_exit_blk(struct crypto_tfm *tfm)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_blkcipher(sctx->fallback.blk);
+	sctx->fallback.blk = NULL;
+}
+
+static struct crypto_alg ecb_aes_alg = {
+	.cra_name		=	"ecb(aes)",
+	.cra_driver_name	=	"ecb-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_init		=	fallback_init_blk,
+	.cra_exit		=	fallback_exit_blk,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.setkey			=	ecb_aes_set_key,
+			.encrypt		=	ecb_aes_encrypt,
+			.decrypt		=	ecb_aes_decrypt,
+		}
+	}
+};
+
+static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = need_fallback(key_len);
+	if (ret > 0) {
+		sctx->key_len = key_len;
+		return setkey_fallback_blk(tfm, in_key, key_len);
+	}
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KMC_AES_128_ENCRYPT;
+		sctx->dec = KMC_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KMC_AES_192_ENCRYPT;
+		sctx->dec = KMC_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KMC_AES_256_ENCRYPT;
+		sctx->dec = KMC_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
+			 struct blkcipher_walk *walk)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+	struct {
+		u8 iv[AES_BLOCK_SIZE];
+		u8 key[AES_MAX_KEY_SIZE];
+	} param;
+
+	if (!nbytes)
+		goto out;
+
+	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+	memcpy(param.key, sctx->key, sctx->key_len);
+	do {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_kmc(func, &param, out, in, n);
+		if (ret < 0 || ret != n)
+			return -EIO;
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
+
+out:
+	return ret;
+}
+
+static int cbc_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(need_fallback(sctx->key_len)))
+		return fallback_blk_enc(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_aes_crypt(desc, sctx->enc, &walk);
+}
+
+static int cbc_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(need_fallback(sctx->key_len)))
+		return fallback_blk_dec(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_aes_crypt(desc, sctx->dec, &walk);
+}
+
+static struct crypto_alg cbc_aes_alg = {
+	.cra_name		=	"cbc(aes)",
+	.cra_driver_name	=	"cbc-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_init		=	fallback_init_blk,
+	.cra_exit		=	fallback_exit_blk,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	cbc_aes_set_key,
+			.encrypt		=	cbc_aes_encrypt,
+			.decrypt		=	cbc_aes_decrypt,
+		}
+	}
+};
+
+static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
+				   unsigned int len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	unsigned int ret;
+
+	xts_ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+	xts_ctx->fallback->base.crt_flags |= (tfm->crt_flags &
+			CRYPTO_TFM_REQ_MASK);
+
+	ret = crypto_blkcipher_setkey(xts_ctx->fallback, key, len);
+	if (ret) {
+		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+		tfm->crt_flags |= (xts_ctx->fallback->base.crt_flags &
+				CRYPTO_TFM_RES_MASK);
+	}
+	return ret;
+}
+
+static int xts_fallback_decrypt(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_blkcipher *tfm;
+	unsigned int ret;
+
+	tfm = desc->tfm;
+	desc->tfm = xts_ctx->fallback;
+
+	ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int xts_fallback_encrypt(struct blkcipher_desc *desc,
+		struct scatterlist *dst, struct scatterlist *src,
+		unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_blkcipher *tfm;
+	unsigned int ret;
+
+	tfm = desc->tfm;
+	desc->tfm = xts_ctx->fallback;
+
+	ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes);
+
+	desc->tfm = tfm;
+	return ret;
+}
+
+static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+
+	switch (key_len) {
+	case 32:
+		xts_ctx->enc = KM_XTS_128_ENCRYPT;
+		xts_ctx->dec = KM_XTS_128_DECRYPT;
+		memcpy(xts_ctx->key + 16, in_key, 16);
+		memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
+		break;
+	case 48:
+		xts_ctx->enc = 0;
+		xts_ctx->dec = 0;
+		xts_fallback_setkey(tfm, in_key, key_len);
+		break;
+	case 64:
+		xts_ctx->enc = KM_XTS_256_ENCRYPT;
+		xts_ctx->dec = KM_XTS_256_DECRYPT;
+		memcpy(xts_ctx->key, in_key, 32);
+		memcpy(xts_ctx->pcc_key, in_key + 32, 32);
+		break;
+	default:
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+	xts_ctx->key_len = key_len;
+	return 0;
+}
+
+static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
+			 struct s390_xts_ctx *xts_ctx,
+			 struct blkcipher_walk *walk)
+{
+	unsigned int offset = (xts_ctx->key_len >> 1) & 0x10;
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+	unsigned int n;
+	u8 *in, *out;
+	struct pcc_param pcc_param;
+	struct {
+		u8 key[32];
+		u8 init[16];
+	} xts_param;
+
+	if (!nbytes)
+		goto out;
+
+	memset(pcc_param.block, 0, sizeof(pcc_param.block));
+	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
+	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
+	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+	memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
+	ret = crypt_s390_pcc(func, &pcc_param.key[offset]);
+	if (ret < 0)
+		return -EIO;
+
+	memcpy(xts_param.key, xts_ctx->key, 32);
+	memcpy(xts_param.init, pcc_param.xts, 16);
+	do {
+		/* only use complete blocks */
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, &xts_param.key[offset], out, in, n);
+		if (ret < 0 || ret != n)
+			return -EIO;
+
+		nbytes &= AES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+out:
+	return ret;
+}
+
+static int xts_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(xts_ctx->key_len == 48))
+		return xts_fallback_encrypt(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk);
+}
+
+static int xts_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	if (unlikely(xts_ctx->key_len == 48))
+		return xts_fallback_decrypt(desc, dst, src, nbytes);
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk);
+}
+
+static int xts_fallback_init(struct crypto_tfm *tfm)
+{
+	const char *name = tfm->__crt_alg->cra_name;
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+	xts_ctx->fallback = crypto_alloc_blkcipher(name, 0,
+			CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(xts_ctx->fallback)) {
+		pr_err("Allocating XTS fallback algorithm %s failed\n",
+		       name);
+		return PTR_ERR(xts_ctx->fallback);
+	}
+	return 0;
+}
+
+static void xts_fallback_exit(struct crypto_tfm *tfm)
+{
+	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_blkcipher(xts_ctx->fallback);
+	xts_ctx->fallback = NULL;
+}
+
+static struct crypto_alg xts_aes_alg = {
+	.cra_name		=	"xts(aes)",
+	.cra_driver_name	=	"xts-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_xts_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_init		=	xts_fallback_init,
+	.cra_exit		=	xts_fallback_exit,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	2 * AES_MIN_KEY_SIZE,
+			.max_keysize		=	2 * AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	xts_aes_set_key,
+			.encrypt		=	xts_aes_encrypt,
+			.decrypt		=	xts_aes_decrypt,
+		}
+	}
+};
+
+static int xts_aes_alg_reg;
+
+static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+			   unsigned int key_len)
+{
+	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+
+	switch (key_len) {
+	case 16:
+		sctx->enc = KMCTR_AES_128_ENCRYPT;
+		sctx->dec = KMCTR_AES_128_DECRYPT;
+		break;
+	case 24:
+		sctx->enc = KMCTR_AES_192_ENCRYPT;
+		sctx->dec = KMCTR_AES_192_DECRYPT;
+		break;
+	case 32:
+		sctx->enc = KMCTR_AES_256_ENCRYPT;
+		sctx->dec = KMCTR_AES_256_DECRYPT;
+		break;
+	}
+
+	return aes_set_key(tfm, in_key, key_len);
+}
+
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* only use complete blocks, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
+	for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
+		memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
+		       AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+	}
+	return n;
+}
+
+static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
+			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+	unsigned int n, nbytes;
+	u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
+	u8 *out, *in, *ctrptr = ctrbuf;
+
+	if (!walk->nbytes)
+		return ret;
+
+	if (spin_trylock(&ctrblk_lock))
+		ctrptr = ctrblk;
+
+	memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		while (nbytes >= AES_BLOCK_SIZE) {
+			if (ctrptr == ctrblk)
+				n = __ctrblk_init(ctrptr, nbytes);
+			else
+				n = AES_BLOCK_SIZE;
+			ret = crypt_s390_kmctr(func, sctx->key, out, in,
+					       n, ctrptr);
+			if (ret < 0 || ret != n) {
+				if (ctrptr == ctrblk)
+					spin_unlock(&ctrblk_lock);
+				return -EIO;
+			}
+			if (n > AES_BLOCK_SIZE)
+				memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
+				       AES_BLOCK_SIZE);
+			crypto_inc(ctrptr, AES_BLOCK_SIZE);
+			out += n;
+			in += n;
+			nbytes -= n;
+		}
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+	if (ctrptr == ctrblk) {
+		if (nbytes)
+			memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
+		else
+			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+		spin_unlock(&ctrblk_lock);
+	} else {
+		if (!nbytes)
+			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+	}
+	/*
+	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
+	 */
+	if (nbytes) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		ret = crypt_s390_kmctr(func, sctx->key, buf, in,
+				       AES_BLOCK_SIZE, ctrbuf);
+		if (ret < 0 || ret != AES_BLOCK_SIZE)
+			return -EIO;
+		memcpy(out, buf, nbytes);
+		crypto_inc(ctrbuf, AES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
+	}
+
+	return ret;
+}
+
+static int ctr_aes_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_aes_crypt(desc, sctx->enc, sctx, &walk);
+}
+
+static int ctr_aes_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_aes_crypt(desc, sctx->dec, sctx, &walk);
+}
+
+static struct crypto_alg ctr_aes_alg = {
+	.cra_name		=	"ctr(aes)",
+	.cra_driver_name	=	"ctr-aes-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.ivsize			=	AES_BLOCK_SIZE,
+			.setkey			=	ctr_aes_set_key,
+			.encrypt		=	ctr_aes_encrypt,
+			.decrypt		=	ctr_aes_decrypt,
+		}
+	}
+};
+
+static int ctr_aes_alg_reg;
+
+static int __init aes_s390_init(void)
+{
+	int ret;
+
+	if (crypt_s390_func_available(KM_AES_128_ENCRYPT, CRYPT_S390_MSA))
+		keylen_flag |= AES_KEYLEN_128;
+	if (crypt_s390_func_available(KM_AES_192_ENCRYPT, CRYPT_S390_MSA))
+		keylen_flag |= AES_KEYLEN_192;
+	if (crypt_s390_func_available(KM_AES_256_ENCRYPT, CRYPT_S390_MSA))
+		keylen_flag |= AES_KEYLEN_256;
+
+	if (!keylen_flag)
+		return -EOPNOTSUPP;
+
+	/* z9 109 and z9 BC/EC only support 128 bit key length */
+	if (keylen_flag == AES_KEYLEN_128)
+		pr_info("AES hardware acceleration is only available for"
+			" 128-bit keys\n");
+
+	ret = crypto_register_alg(&aes_alg);
+	if (ret)
+		goto aes_err;
+
+	ret = crypto_register_alg(&ecb_aes_alg);
+	if (ret)
+		goto ecb_aes_err;
+
+	ret = crypto_register_alg(&cbc_aes_alg);
+	if (ret)
+		goto cbc_aes_err;
+
+	if (crypt_s390_func_available(KM_XTS_128_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KM_XTS_256_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ret = crypto_register_alg(&xts_aes_alg);
+		if (ret)
+			goto xts_aes_err;
+		xts_aes_alg_reg = 1;
+	}
+
+	if (crypt_s390_func_available(KMCTR_AES_128_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_AES_192_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_AES_256_ENCRYPT,
+				CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto ctr_aes_err;
+		}
+		ret = crypto_register_alg(&ctr_aes_alg);
+		if (ret) {
+			free_page((unsigned long) ctrblk);
+			goto ctr_aes_err;
+		}
+		ctr_aes_alg_reg = 1;
+	}
+
+out:
+	return ret;
+
+ctr_aes_err:
+	crypto_unregister_alg(&xts_aes_alg);
+xts_aes_err:
+	crypto_unregister_alg(&cbc_aes_alg);
+cbc_aes_err:
+	crypto_unregister_alg(&ecb_aes_alg);
+ecb_aes_err:
+	crypto_unregister_alg(&aes_alg);
+aes_err:
+	goto out;
+}
+
+static void __exit aes_s390_fini(void)
+{
+	if (ctr_aes_alg_reg) {
+		crypto_unregister_alg(&ctr_aes_alg);
+		free_page((unsigned long) ctrblk);
+	}
+	if (xts_aes_alg_reg)
+		crypto_unregister_alg(&xts_aes_alg);
+	crypto_unregister_alg(&cbc_aes_alg);
+	crypto_unregister_alg(&ecb_aes_alg);
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_s390_init);
+module_exit(aes_s390_fini);
+
+MODULE_ALIAS_CRYPTO("aes-all");
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
new file mode 100644
index 000000000..d9c4c313f
--- /dev/null
+++ b/arch/s390/crypto/crypt_s390.h
@@ -0,0 +1,493 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for s390 cryptographic instructions.
+ *
+ *   Copyright IBM Corp. 2003, 2015
+ *   Author(s): Thomas Spatzier
+ *		Jan Glauber (jan.glauber@de.ibm.com)
+ *		Harald Freudenberger (freude@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ARCH_S390_CRYPT_S390_H
+#define _CRYPTO_ARCH_S390_CRYPT_S390_H
+
+#include <asm/errno.h>
+#include <asm/facility.h>
+
+#define CRYPT_S390_OP_MASK 0xFF00
+#define CRYPT_S390_FUNC_MASK 0x00FF
+
+#define CRYPT_S390_PRIORITY 300
+#define CRYPT_S390_COMPOSITE_PRIORITY 400
+
+#define CRYPT_S390_MSA	0x1
+#define CRYPT_S390_MSA3	0x2
+#define CRYPT_S390_MSA4	0x4
+#define CRYPT_S390_MSA5	0x8
+
+/* s390 cryptographic operations */
+enum crypt_s390_operations {
+	CRYPT_S390_KM	 = 0x0100,
+	CRYPT_S390_KMC	 = 0x0200,
+	CRYPT_S390_KIMD  = 0x0300,
+	CRYPT_S390_KLMD  = 0x0400,
+	CRYPT_S390_KMAC  = 0x0500,
+	CRYPT_S390_KMCTR = 0x0600,
+	CRYPT_S390_PPNO  = 0x0700
+};
+
+/*
+ * function codes for KM (CIPHER MESSAGE) instruction
+ * 0x80 is the decipher modifier bit
+ */
+enum crypt_s390_km_func {
+	KM_QUERY	    = CRYPT_S390_KM | 0x0,
+	KM_DEA_ENCRYPT      = CRYPT_S390_KM | 0x1,
+	KM_DEA_DECRYPT      = CRYPT_S390_KM | 0x1 | 0x80,
+	KM_TDEA_128_ENCRYPT = CRYPT_S390_KM | 0x2,
+	KM_TDEA_128_DECRYPT = CRYPT_S390_KM | 0x2 | 0x80,
+	KM_TDEA_192_ENCRYPT = CRYPT_S390_KM | 0x3,
+	KM_TDEA_192_DECRYPT = CRYPT_S390_KM | 0x3 | 0x80,
+	KM_AES_128_ENCRYPT  = CRYPT_S390_KM | 0x12,
+	KM_AES_128_DECRYPT  = CRYPT_S390_KM | 0x12 | 0x80,
+	KM_AES_192_ENCRYPT  = CRYPT_S390_KM | 0x13,
+	KM_AES_192_DECRYPT  = CRYPT_S390_KM | 0x13 | 0x80,
+	KM_AES_256_ENCRYPT  = CRYPT_S390_KM | 0x14,
+	KM_AES_256_DECRYPT  = CRYPT_S390_KM | 0x14 | 0x80,
+	KM_XTS_128_ENCRYPT  = CRYPT_S390_KM | 0x32,
+	KM_XTS_128_DECRYPT  = CRYPT_S390_KM | 0x32 | 0x80,
+	KM_XTS_256_ENCRYPT  = CRYPT_S390_KM | 0x34,
+	KM_XTS_256_DECRYPT  = CRYPT_S390_KM | 0x34 | 0x80,
+};
+
+/*
+ * function codes for KMC (CIPHER MESSAGE WITH CHAINING)
+ * instruction
+ */
+enum crypt_s390_kmc_func {
+	KMC_QUERY            = CRYPT_S390_KMC | 0x0,
+	KMC_DEA_ENCRYPT      = CRYPT_S390_KMC | 0x1,
+	KMC_DEA_DECRYPT      = CRYPT_S390_KMC | 0x1 | 0x80,
+	KMC_TDEA_128_ENCRYPT = CRYPT_S390_KMC | 0x2,
+	KMC_TDEA_128_DECRYPT = CRYPT_S390_KMC | 0x2 | 0x80,
+	KMC_TDEA_192_ENCRYPT = CRYPT_S390_KMC | 0x3,
+	KMC_TDEA_192_DECRYPT = CRYPT_S390_KMC | 0x3 | 0x80,
+	KMC_AES_128_ENCRYPT  = CRYPT_S390_KMC | 0x12,
+	KMC_AES_128_DECRYPT  = CRYPT_S390_KMC | 0x12 | 0x80,
+	KMC_AES_192_ENCRYPT  = CRYPT_S390_KMC | 0x13,
+	KMC_AES_192_DECRYPT  = CRYPT_S390_KMC | 0x13 | 0x80,
+	KMC_AES_256_ENCRYPT  = CRYPT_S390_KMC | 0x14,
+	KMC_AES_256_DECRYPT  = CRYPT_S390_KMC | 0x14 | 0x80,
+	KMC_PRNG	     = CRYPT_S390_KMC | 0x43,
+};
+
+/*
+ * function codes for KMCTR (CIPHER MESSAGE WITH COUNTER)
+ * instruction
+ */
+enum crypt_s390_kmctr_func {
+	KMCTR_QUERY            = CRYPT_S390_KMCTR | 0x0,
+	KMCTR_DEA_ENCRYPT      = CRYPT_S390_KMCTR | 0x1,
+	KMCTR_DEA_DECRYPT      = CRYPT_S390_KMCTR | 0x1 | 0x80,
+	KMCTR_TDEA_128_ENCRYPT = CRYPT_S390_KMCTR | 0x2,
+	KMCTR_TDEA_128_DECRYPT = CRYPT_S390_KMCTR | 0x2 | 0x80,
+	KMCTR_TDEA_192_ENCRYPT = CRYPT_S390_KMCTR | 0x3,
+	KMCTR_TDEA_192_DECRYPT = CRYPT_S390_KMCTR | 0x3 | 0x80,
+	KMCTR_AES_128_ENCRYPT  = CRYPT_S390_KMCTR | 0x12,
+	KMCTR_AES_128_DECRYPT  = CRYPT_S390_KMCTR | 0x12 | 0x80,
+	KMCTR_AES_192_ENCRYPT  = CRYPT_S390_KMCTR | 0x13,
+	KMCTR_AES_192_DECRYPT  = CRYPT_S390_KMCTR | 0x13 | 0x80,
+	KMCTR_AES_256_ENCRYPT  = CRYPT_S390_KMCTR | 0x14,
+	KMCTR_AES_256_DECRYPT  = CRYPT_S390_KMCTR | 0x14 | 0x80,
+};
+
+/*
+ * function codes for KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
+ * instruction
+ */
+enum crypt_s390_kimd_func {
+	KIMD_QUERY   = CRYPT_S390_KIMD | 0,
+	KIMD_SHA_1   = CRYPT_S390_KIMD | 1,
+	KIMD_SHA_256 = CRYPT_S390_KIMD | 2,
+	KIMD_SHA_512 = CRYPT_S390_KIMD | 3,
+	KIMD_GHASH   = CRYPT_S390_KIMD | 65,
+};
+
+/*
+ * function codes for KLMD (COMPUTE LAST MESSAGE DIGEST)
+ * instruction
+ */
+enum crypt_s390_klmd_func {
+	KLMD_QUERY   = CRYPT_S390_KLMD | 0,
+	KLMD_SHA_1   = CRYPT_S390_KLMD | 1,
+	KLMD_SHA_256 = CRYPT_S390_KLMD | 2,
+	KLMD_SHA_512 = CRYPT_S390_KLMD | 3,
+};
+
+/*
+ * function codes for KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
+ * instruction
+ */
+enum crypt_s390_kmac_func {
+	KMAC_QUERY    = CRYPT_S390_KMAC | 0,
+	KMAC_DEA      = CRYPT_S390_KMAC | 1,
+	KMAC_TDEA_128 = CRYPT_S390_KMAC | 2,
+	KMAC_TDEA_192 = CRYPT_S390_KMAC | 3
+};
+
+/*
+ * function codes for PPNO (PERFORM PSEUDORANDOM NUMBER
+ * OPERATION) instruction
+ */
+enum crypt_s390_ppno_func {
+	PPNO_QUERY	      = CRYPT_S390_PPNO | 0,
+	PPNO_SHA512_DRNG_GEN  = CRYPT_S390_PPNO | 3,
+	PPNO_SHA512_DRNG_SEED = CRYPT_S390_PPNO | 0x83
+};
+
+/**
+ * crypt_s390_km:
+ * @func: the function code passed to KM; see crypt_s390_km_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Executes the KM (CIPHER MESSAGE) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for encryption/decryption funcs
+ */
+static inline int crypt_s390_km(long func, void *param,
+				u8 *dest, const u8 *src, long src_len)
+{
+	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param asm("1") = param;
+	register const u8 *__src asm("2") = src;
+	register long __src_len asm("3") = src_len;
+	register u8 *__dest asm("4") = dest;
+	int ret;
+
+	asm volatile(
+		"0:	.insn	rre,0xb92e0000,%3,%1\n" /* KM opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
+		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
+ * crypt_s390_kmc:
+ * @func: the function code passed to KM; see crypt_s390_kmc_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Executes the KMC (CIPHER MESSAGE WITH CHAINING) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for encryption/decryption funcs
+ */
+static inline int crypt_s390_kmc(long func, void *param,
+				 u8 *dest, const u8 *src, long src_len)
+{
+	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param asm("1") = param;
+	register const u8 *__src asm("2") = src;
+	register long __src_len asm("3") = src_len;
+	register u8 *__dest asm("4") = dest;
+	int ret;
+
+	asm volatile(
+		"0:	.insn	rre,0xb92f0000,%3,%1\n" /* KMC opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
+		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
+ * crypt_s390_kimd:
+ * @func: the function code passed to KM; see crypt_s390_kimd_func
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Executes the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST) operation
+ * of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for digest funcs
+ */
+static inline int crypt_s390_kimd(long func, void *param,
+				  const u8 *src, long src_len)
+{
+	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param asm("1") = param;
+	register const u8 *__src asm("2") = src;
+	register long __src_len asm("3") = src_len;
+	int ret;
+
+	asm volatile(
+		"0:	.insn	rre,0xb93e0000,%1,%1\n" /* KIMD opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
+ * crypt_s390_klmd:
+ * @func: the function code passed to KM; see crypt_s390_klmd_func
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Executes the KLMD (COMPUTE LAST MESSAGE DIGEST) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for digest funcs
+ */
+static inline int crypt_s390_klmd(long func, void *param,
+				  const u8 *src, long src_len)
+{
+	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param asm("1") = param;
+	register const u8 *__src asm("2") = src;
+	register long __src_len asm("3") = src_len;
+	int ret;
+
+	asm volatile(
+		"0:	.insn	rre,0xb93f0000,%1,%1\n" /* KLMD opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
+ * crypt_s390_kmac:
+ * @func: the function code passed to KM; see crypt_s390_klmd_func
+ * @param: address of parameter block; see POP for details on each func
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ *
+ * Executes the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE) operation
+ * of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for digest funcs
+ */
+static inline int crypt_s390_kmac(long func, void *param,
+				  const u8 *src, long src_len)
+{
+	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param asm("1") = param;
+	register const u8 *__src asm("2") = src;
+	register long __src_len asm("3") = src_len;
+	int ret;
+
+	asm volatile(
+		"0:	.insn	rre,0xb91e0000,%1,%1\n" /* KLAC opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-1) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
+ * crypt_s390_kmctr:
+ * @func: the function code passed to KMCTR; see crypt_s390_kmctr_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @src: address of source memory area
+ * @src_len: length of src operand in bytes
+ * @counter: address of counter value
+ *
+ * Executes the KMCTR (CIPHER MESSAGE WITH COUNTER) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of processed
+ * bytes for encryption/decryption funcs
+ */
+static inline int crypt_s390_kmctr(long func, void *param, u8 *dest,
+				 const u8 *src, long src_len, u8 *counter)
+{
+	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param asm("1") = param;
+	register const u8 *__src asm("2") = src;
+	register long __src_len asm("3") = src_len;
+	register u8 *__dest asm("4") = dest;
+	register u8 *__ctr asm("6") = counter;
+	int ret = -1;
+
+	asm volatile(
+		"0:	.insn	rrf,0xb92d0000,%3,%1,%4,0\n" /* KMCTR opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest),
+		  "+a" (__ctr)
+		: "d" (__func), "a" (__param) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
+}
+
+/**
+ * crypt_s390_ppno:
+ * @func: the function code passed to PPNO; see crypt_s390_ppno_func
+ * @param: address of parameter block; see POP for details on each func
+ * @dest: address of destination memory area
+ * @dest_len: size of destination memory area in bytes
+ * @seed: address of seed data
+ * @seed_len: size of seed data in bytes
+ *
+ * Executes the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
+ * operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for the query func, number of random
+ * bytes stored in dest buffer for generate function
+ */
+static inline int crypt_s390_ppno(long func, void *param,
+				  u8 *dest, long dest_len,
+				  const u8 *seed, long seed_len)
+{
+	register long  __func	  asm("0") = func & CRYPT_S390_FUNC_MASK;
+	register void *__param	  asm("1") = param;    /* param block (240 bytes) */
+	register u8   *__dest	  asm("2") = dest;     /* buf for recv random bytes */
+	register long  __dest_len asm("3") = dest_len; /* requested random bytes */
+	register const u8 *__seed asm("4") = seed;     /* buf with seed data */
+	register long  __seed_len asm("5") = seed_len; /* bytes in seed buf */
+	int ret = -1;
+
+	asm volatile (
+		"0:	.insn	rre,0xb93c0000,%1,%5\n"	/* PPNO opcode */
+		"1:	brc	1,0b\n"	  /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (ret), "+a"(__dest), "+d"(__dest_len)
+		: "d"(__func), "a"(__param), "a"(__seed), "d"(__seed_len)
+		: "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? dest_len - __dest_len : 0;
+}
+
+/**
+ * crypt_s390_func_available:
+ * @func: the function code of the specific function; 0 if op in general
+ *
+ * Tests if a specific crypto function is implemented on the machine.
+ *
+ * Returns 1 if func available; 0 if func or op in general not available
+ */
+static inline int crypt_s390_func_available(int func,
+					    unsigned int facility_mask)
+{
+	unsigned char status[16];
+	int ret;
+
+	if (facility_mask & CRYPT_S390_MSA && !test_facility(17))
+		return 0;
+	if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76))
+		return 0;
+	if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77))
+		return 0;
+	if (facility_mask & CRYPT_S390_MSA5 && !test_facility(57))
+		return 0;
+
+	switch (func & CRYPT_S390_OP_MASK) {
+	case CRYPT_S390_KM:
+		ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0);
+		break;
+	case CRYPT_S390_KMC:
+		ret = crypt_s390_kmc(KMC_QUERY, &status, NULL, NULL, 0);
+		break;
+	case CRYPT_S390_KIMD:
+		ret = crypt_s390_kimd(KIMD_QUERY, &status, NULL, 0);
+		break;
+	case CRYPT_S390_KLMD:
+		ret = crypt_s390_klmd(KLMD_QUERY, &status, NULL, 0);
+		break;
+	case CRYPT_S390_KMAC:
+		ret = crypt_s390_kmac(KMAC_QUERY, &status, NULL, 0);
+		break;
+	case CRYPT_S390_KMCTR:
+		ret = crypt_s390_kmctr(KMCTR_QUERY, &status,
+				       NULL, NULL, 0, NULL);
+		break;
+	case CRYPT_S390_PPNO:
+		ret = crypt_s390_ppno(PPNO_QUERY, &status,
+				      NULL, 0, NULL, 0);
+		break;
+	default:
+		return 0;
+	}
+	if (ret < 0)
+		return 0;
+	func &= CRYPT_S390_FUNC_MASK;
+	func &= 0x7f;		/* mask modifier bit */
+	return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+/**
+ * crypt_s390_pcc:
+ * @func: the function code passed to KM; see crypt_s390_km_func
+ * @param: address of parameter block; see POP for details on each func
+ *
+ * Executes the PCC (PERFORM CRYPTOGRAPHIC COMPUTATION) operation of the CPU.
+ *
+ * Returns -1 for failure, 0 for success.
+ */
+static inline int crypt_s390_pcc(long func, void *param)
+{
+	register long __func asm("0") = func & 0x7f; /* encrypt or decrypt */
+	register void *__param asm("1") = param;
+	int ret = -1;
+
+	asm volatile(
+		"0:	.insn	rre,0xb92c0000,0,0\n" /* PCC opcode */
+		"1:	brc	1,0b\n" /* handle partial completion */
+		"	la	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (ret)
+		: "d" (__func), "a" (__param) : "cc", "memory");
+	return ret;
+}
+
+#endif	/* _CRYPTO_ARCH_S390_CRYPT_S390_H */
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
new file mode 100644
index 000000000..9e05cc453
--- /dev/null
+++ b/arch/s390/crypto/des_s390.c
@@ -0,0 +1,626 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the DES Cipher Algorithm.
+ *
+ * Copyright IBM Corp. 2003, 2011
+ * Author(s): Thomas Spatzier
+ *	      Jan Glauber (jan.glauber@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/des.h>
+
+#include "crypt_s390.h"
+
+#define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
+
+static u8 *ctrblk;
+static DEFINE_SPINLOCK(ctrblk_lock);
+
+struct s390_des_ctx {
+	u8 iv[DES_BLOCK_SIZE];
+	u8 key[DES3_KEY_SIZE];
+};
+
+static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
+		      unsigned int key_len)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	u32 tmp[DES_EXPKEY_WORDS];
+
+	/* check for weak keys */
+	if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, key_len);
+	return 0;
+}
+
+static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypt_s390_km(KM_DEA_ENCRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
+}
+
+static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypt_s390_km(KM_DEA_DECRYPT, ctx->key, out, in, DES_BLOCK_SIZE);
+}
+
+static struct crypto_alg des_alg = {
+	.cra_name		=	"des",
+	.cra_driver_name	=	"des-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	DES_KEY_SIZE,
+			.cia_max_keysize	=	DES_KEY_SIZE,
+			.cia_setkey		=	des_setkey,
+			.cia_encrypt		=	des_encrypt,
+			.cia_decrypt		=	des_decrypt,
+		}
+	}
+};
+
+static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
+			    u8 *key, struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes;
+
+	while ((nbytes = walk->nbytes)) {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_km(func, key, out, in, n);
+		if (ret < 0 || ret != n)
+			return -EIO;
+
+		nbytes &= DES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	return ret;
+}
+
+static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
+			    struct blkcipher_walk *walk)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	int ret = blkcipher_walk_virt(desc, walk);
+	unsigned int nbytes = walk->nbytes;
+	struct {
+		u8 iv[DES_BLOCK_SIZE];
+		u8 key[DES3_KEY_SIZE];
+	} param;
+
+	if (!nbytes)
+		goto out;
+
+	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
+	do {
+		/* only use complete blocks */
+		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		u8 *out = walk->dst.virt.addr;
+		u8 *in = walk->src.virt.addr;
+
+		ret = crypt_s390_kmc(func, &param, out, in, n);
+		if (ret < 0 || ret != n)
+			return -EIO;
+
+		nbytes &= DES_BLOCK_SIZE - 1;
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	} while ((nbytes = walk->nbytes));
+	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
+
+out:
+	return ret;
+}
+
+static int ecb_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_DEA_ENCRYPT, ctx->key, &walk);
+}
+
+static int ecb_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_DEA_DECRYPT, ctx->key, &walk);
+}
+
+static struct crypto_alg ecb_des_alg = {
+	.cra_name		=	"ecb(des)",
+	.cra_driver_name	=	"ecb-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	ecb_des_encrypt,
+			.decrypt		=	ecb_des_decrypt,
+		}
+	}
+};
+
+static int cbc_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_DEA_ENCRYPT, &walk);
+}
+
+static int cbc_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_DEA_DECRYPT, &walk);
+}
+
+static struct crypto_alg cbc_des_alg = {
+	.cra_name		=	"cbc(des)",
+	.cra_driver_name	=	"cbc-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	cbc_des_encrypt,
+			.decrypt		=	cbc_des_decrypt,
+		}
+	}
+};
+
+/*
+ * RFC2451:
+ *
+ *   For DES-EDE3, there is no known need to reject weak or
+ *   complementation keys.  Any weakness is obviated by the use of
+ *   multiple keys.
+ *
+ *   However, if the first two or last two independent 64-bit keys are
+ *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
+ *   same as DES.  Implementers MUST reject keys that exhibit this
+ *   property.
+ *
+ */
+static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
+		       unsigned int key_len)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+
+	if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
+	    crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
+			  DES_KEY_SIZE)) &&
+	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+	memcpy(ctx->key, key, key_len);
+	return 0;
+}
+
+static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypt_s390_km(KM_TDEA_192_ENCRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
+}
+
+static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypt_s390_km(KM_TDEA_192_DECRYPT, ctx->key, dst, src, DES_BLOCK_SIZE);
+}
+
+static struct crypto_alg des3_alg = {
+	.cra_name		=	"des3_ede",
+	.cra_driver_name	=	"des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	DES3_KEY_SIZE,
+			.cia_max_keysize	=	DES3_KEY_SIZE,
+			.cia_setkey		=	des3_setkey,
+			.cia_encrypt		=	des3_encrypt,
+			.cia_decrypt		=	des3_decrypt,
+		}
+	}
+};
+
+static int ecb_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_192_ENCRYPT, ctx->key, &walk);
+}
+
+static int ecb_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_desall_crypt(desc, KM_TDEA_192_DECRYPT, ctx->key, &walk);
+}
+
+static struct crypto_alg ecb_des3_alg = {
+	.cra_name		=	"ecb(des3_ede)",
+	.cra_driver_name	=	"ecb-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
+			.setkey			=	des3_setkey,
+			.encrypt		=	ecb_des3_encrypt,
+			.decrypt		=	ecb_des3_decrypt,
+		}
+	}
+};
+
+static int cbc_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_ENCRYPT, &walk);
+}
+
+static int cbc_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return cbc_desall_crypt(desc, KMC_TDEA_192_DECRYPT, &walk);
+}
+
+static struct crypto_alg cbc_des3_alg = {
+	.cra_name		=	"cbc(des3_ede)",
+	.cra_driver_name	=	"cbc-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	DES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des3_setkey,
+			.encrypt		=	cbc_des3_encrypt,
+			.decrypt		=	cbc_des3_decrypt,
+		}
+	}
+};
+
+static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+{
+	unsigned int i, n;
+
+	/* align to block size, max. PAGE_SIZE */
+	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
+	for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
+		memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+		crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
+	}
+	return n;
+}
+
+static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
+			    struct s390_des_ctx *ctx,
+			    struct blkcipher_walk *walk)
+{
+	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+	unsigned int n, nbytes;
+	u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
+	u8 *out, *in, *ctrptr = ctrbuf;
+
+	if (!walk->nbytes)
+		return ret;
+
+	if (spin_trylock(&ctrblk_lock))
+		ctrptr = ctrblk;
+
+	memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		while (nbytes >= DES_BLOCK_SIZE) {
+			if (ctrptr == ctrblk)
+				n = __ctrblk_init(ctrptr, nbytes);
+			else
+				n = DES_BLOCK_SIZE;
+			ret = crypt_s390_kmctr(func, ctx->key, out, in,
+					       n, ctrptr);
+			if (ret < 0 || ret != n) {
+				if (ctrptr == ctrblk)
+					spin_unlock(&ctrblk_lock);
+				return -EIO;
+			}
+			if (n > DES_BLOCK_SIZE)
+				memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
+				       DES_BLOCK_SIZE);
+			crypto_inc(ctrptr, DES_BLOCK_SIZE);
+			out += n;
+			in += n;
+			nbytes -= n;
+		}
+		ret = blkcipher_walk_done(desc, walk, nbytes);
+	}
+	if (ctrptr == ctrblk) {
+		if (nbytes)
+			memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
+		else
+			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+		spin_unlock(&ctrblk_lock);
+	} else {
+		if (!nbytes)
+			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+	}
+	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
+	if (nbytes) {
+		out = walk->dst.virt.addr;
+		in = walk->src.virt.addr;
+		ret = crypt_s390_kmctr(func, ctx->key, buf, in,
+				       DES_BLOCK_SIZE, ctrbuf);
+		if (ret < 0 || ret != DES_BLOCK_SIZE)
+			return -EIO;
+		memcpy(out, buf, nbytes);
+		crypto_inc(ctrbuf, DES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
+	}
+	return ret;
+}
+
+static int ctr_des_encrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_DEA_ENCRYPT, ctx, &walk);
+}
+
+static int ctr_des_decrypt(struct blkcipher_desc *desc,
+			   struct scatterlist *dst, struct scatterlist *src,
+			   unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_DEA_DECRYPT, ctx, &walk);
+}
+
+static struct crypto_alg ctr_des_alg = {
+	.cra_name		=	"ctr(des)",
+	.cra_driver_name	=	"ctr-des-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES_KEY_SIZE,
+			.max_keysize		=	DES_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des_setkey,
+			.encrypt		=	ctr_des_encrypt,
+			.decrypt		=	ctr_des_decrypt,
+		}
+	}
+};
+
+static int ctr_des3_encrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_TDEA_192_ENCRYPT, ctx, &walk);
+}
+
+static int ctr_des3_decrypt(struct blkcipher_desc *desc,
+			    struct scatterlist *dst, struct scatterlist *src,
+			    unsigned int nbytes)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ctr_desall_crypt(desc, KMCTR_TDEA_192_DECRYPT, ctx, &walk);
+}
+
+static struct crypto_alg ctr_des3_alg = {
+	.cra_name		=	"ctr(des3_ede)",
+	.cra_driver_name	=	"ctr-des3_ede-s390",
+	.cra_priority		=	CRYPT_S390_COMPOSITE_PRIORITY,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		=	1,
+	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.blkcipher = {
+			.min_keysize		=	DES3_KEY_SIZE,
+			.max_keysize		=	DES3_KEY_SIZE,
+			.ivsize			=	DES_BLOCK_SIZE,
+			.setkey			=	des3_setkey,
+			.encrypt		=	ctr_des3_encrypt,
+			.decrypt		=	ctr_des3_decrypt,
+		}
+	}
+};
+
+static int __init des_s390_init(void)
+{
+	int ret;
+
+	if (!crypt_s390_func_available(KM_DEA_ENCRYPT, CRYPT_S390_MSA) ||
+	    !crypt_s390_func_available(KM_TDEA_192_ENCRYPT, CRYPT_S390_MSA))
+		return -EOPNOTSUPP;
+
+	ret = crypto_register_alg(&des_alg);
+	if (ret)
+		goto des_err;
+	ret = crypto_register_alg(&ecb_des_alg);
+	if (ret)
+		goto ecb_des_err;
+	ret = crypto_register_alg(&cbc_des_alg);
+	if (ret)
+		goto cbc_des_err;
+	ret = crypto_register_alg(&des3_alg);
+	if (ret)
+		goto des3_err;
+	ret = crypto_register_alg(&ecb_des3_alg);
+	if (ret)
+		goto ecb_des3_err;
+	ret = crypto_register_alg(&cbc_des3_alg);
+	if (ret)
+		goto cbc_des3_err;
+
+	if (crypt_s390_func_available(KMCTR_DEA_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4) &&
+	    crypt_s390_func_available(KMCTR_TDEA_192_ENCRYPT,
+			CRYPT_S390_MSA | CRYPT_S390_MSA4)) {
+		ret = crypto_register_alg(&ctr_des_alg);
+		if (ret)
+			goto ctr_des_err;
+		ret = crypto_register_alg(&ctr_des3_alg);
+		if (ret)
+			goto ctr_des3_err;
+		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
+		if (!ctrblk) {
+			ret = -ENOMEM;
+			goto ctr_mem_err;
+		}
+	}
+out:
+	return ret;
+
+ctr_mem_err:
+	crypto_unregister_alg(&ctr_des3_alg);
+ctr_des3_err:
+	crypto_unregister_alg(&ctr_des_alg);
+ctr_des_err:
+	crypto_unregister_alg(&cbc_des3_alg);
+cbc_des3_err:
+	crypto_unregister_alg(&ecb_des3_alg);
+ecb_des3_err:
+	crypto_unregister_alg(&des3_alg);
+des3_err:
+	crypto_unregister_alg(&cbc_des_alg);
+cbc_des_err:
+	crypto_unregister_alg(&ecb_des_alg);
+ecb_des_err:
+	crypto_unregister_alg(&des_alg);
+des_err:
+	goto out;
+}
+
+static void __exit des_s390_exit(void)
+{
+	if (ctrblk) {
+		crypto_unregister_alg(&ctr_des_alg);
+		crypto_unregister_alg(&ctr_des3_alg);
+		free_page((unsigned long) ctrblk);
+	}
+	crypto_unregister_alg(&cbc_des3_alg);
+	crypto_unregister_alg(&ecb_des3_alg);
+	crypto_unregister_alg(&des3_alg);
+	crypto_unregister_alg(&cbc_des_alg);
+	crypto_unregister_alg(&ecb_des_alg);
+	crypto_unregister_alg(&des_alg);
+}
+
+module_init(des_s390_init);
+module_exit(des_s390_exit);
+
+MODULE_ALIAS_CRYPTO("des");
+MODULE_ALIAS_CRYPTO("des3_ede");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
new file mode 100644
index 000000000..b258110da
--- /dev/null
+++ b/arch/s390/crypto/ghash_s390.c
@@ -0,0 +1,167 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the GHASH algorithm for GCM (Galois/Counter Mode).
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+
+#include "crypt_s390.h"
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+struct ghash_ctx {
+	u8 key[GHASH_BLOCK_SIZE];
+};
+
+struct ghash_desc_ctx {
+	u8 icv[GHASH_BLOCK_SIZE];
+	u8 key[GHASH_BLOCK_SIZE];
+	u8 buffer[GHASH_BLOCK_SIZE];
+	u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+
+	memset(dctx, 0, sizeof(*dctx));
+	memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *key, unsigned int keylen)
+{
+	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+			 const u8 *src, unsigned int srclen)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	unsigned int n;
+	u8 *buf = dctx->buffer;
+	int ret;
+
+	if (dctx->bytes) {
+		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		n = min(srclen, dctx->bytes);
+		dctx->bytes -= n;
+		srclen -= n;
+
+		memcpy(pos, src, n);
+		src += n;
+
+		if (!dctx->bytes) {
+			ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
+					      GHASH_BLOCK_SIZE);
+			if (ret != GHASH_BLOCK_SIZE)
+				return -EIO;
+		}
+	}
+
+	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
+	if (n) {
+		ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
+		if (ret != n)
+			return -EIO;
+		src += n;
+		srclen -= n;
+	}
+
+	if (srclen) {
+		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+		memcpy(buf, src, srclen);
+	}
+
+	return 0;
+}
+
+static int ghash_flush(struct ghash_desc_ctx *dctx)
+{
+	u8 *buf = dctx->buffer;
+	int ret;
+
+	if (dctx->bytes) {
+		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		memset(pos, 0, dctx->bytes);
+
+		ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
+		if (ret != GHASH_BLOCK_SIZE)
+			return -EIO;
+
+		dctx->bytes = 0;
+	}
+
+	return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	int ret;
+
+	ret = ghash_flush(dctx);
+	if (!ret)
+		memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
+	return ret;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize	= GHASH_DIGEST_SIZE,
+	.init		= ghash_init,
+	.update		= ghash_update,
+	.final		= ghash_final,
+	.setkey		= ghash_setkey,
+	.descsize	= sizeof(struct ghash_desc_ctx),
+	.base		= {
+		.cra_name		= "ghash",
+		.cra_driver_name	= "ghash-s390",
+		.cra_priority		= CRYPT_S390_PRIORITY,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= GHASH_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct ghash_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+};
+
+static int __init ghash_mod_init(void)
+{
+	if (!crypt_s390_func_available(KIMD_GHASH,
+				       CRYPT_S390_MSA | CRYPT_S390_MSA4))
+		return -EOPNOTSUPP;
+
+	return crypto_register_shash(&ghash_alg);
+}
+
+static void __exit ghash_mod_exit(void)
+{
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_mod_init);
+module_exit(ghash_mod_exit);
+
+MODULE_ALIAS_CRYPTO("ghash");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
new file mode 100644
index 000000000..9d5192c94
--- /dev/null
+++ b/arch/s390/crypto/prng.c
@@ -0,0 +1,919 @@
+/*
+ * Copyright IBM Corp. 2006, 2015
+ * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ *	      Harald Freudenberger <freude@de.ibm.com>
+ * Driver for the s390 pseudo random number generator
+ */
+
+#define KMSG_COMPONENT "prng"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/fs.h>
+#include <linux/fips.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <asm/debug.h>
+#include <asm/uaccess.h>
+#include <asm/timex.h>
+
+#include "crypt_s390.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
+MODULE_DESCRIPTION("s390 PRNG interface");
+
+
+#define PRNG_MODE_AUTO	  0
+#define PRNG_MODE_TDES	  1
+#define PRNG_MODE_SHA512  2
+
+static unsigned int prng_mode = PRNG_MODE_AUTO;
+module_param_named(mode, prng_mode, int, 0);
+MODULE_PARM_DESC(prng_mode, "PRNG mode: 0 - auto, 1 - TDES, 2 - SHA512");
+
+
+#define PRNG_CHUNKSIZE_TDES_MIN   8
+#define PRNG_CHUNKSIZE_TDES_MAX   (64*1024)
+#define PRNG_CHUNKSIZE_SHA512_MIN 64
+#define PRNG_CHUNKSIZE_SHA512_MAX (64*1024)
+
+static unsigned int prng_chunk_size = 256;
+module_param_named(chunksize, prng_chunk_size, int, 0);
+MODULE_PARM_DESC(prng_chunk_size, "PRNG read chunk size in bytes");
+
+
+#define PRNG_RESEED_LIMIT_TDES		 4096
+#define PRNG_RESEED_LIMIT_TDES_LOWER	 4096
+#define PRNG_RESEED_LIMIT_SHA512       100000
+#define PRNG_RESEED_LIMIT_SHA512_LOWER	10000
+
+static unsigned int prng_reseed_limit;
+module_param_named(reseed_limit, prng_reseed_limit, int, 0);
+MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
+
+
+/*
+ * Any one who considers arithmetical methods of producing random digits is,
+ * of course, in a state of sin. -- John von Neumann
+ */
+
+static int prng_errorflag;
+
+#define PRNG_GEN_ENTROPY_FAILED  1
+#define PRNG_SELFTEST_FAILED	 2
+#define PRNG_INSTANTIATE_FAILED  3
+#define PRNG_SEED_FAILED	 4
+#define PRNG_RESEED_FAILED	 5
+#define PRNG_GEN_FAILED		 6
+
+struct prng_ws_s {
+	u8  parm_block[32];
+	u32 reseed_counter;
+	u64 byte_counter;
+};
+
+struct ppno_ws_s {
+	u32 res;
+	u32 reseed_counter;
+	u64 stream_bytes;
+	u8  V[112];
+	u8  C[112];
+};
+
+struct prng_data_s {
+	struct mutex mutex;
+	union {
+		struct prng_ws_s prngws;
+		struct ppno_ws_s ppnows;
+	};
+	u8 *buf;
+	u32 rest;
+	u8 *prev;
+};
+
+static struct prng_data_s *prng_data;
+
+/* initial parameter block for tdes mode, copied from libica */
+static const u8 initial_parm_block[32] __initconst = {
+	0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+	0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+	0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+	0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0 };
+
+
+/*** helper functions ***/
+
+static int generate_entropy(u8 *ebuf, size_t nbytes)
+{
+	int n, ret = 0;
+	u8 *pg, *h, hash[32];
+
+	pg = (u8 *) __get_free_page(GFP_KERNEL);
+	if (!pg) {
+		prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+		return -ENOMEM;
+	}
+
+	while (nbytes) {
+		/* fill page with urandom bytes */
+		get_random_bytes(pg, PAGE_SIZE);
+		/* exor page with stckf values */
+		for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
+			u64 *p = ((u64 *)pg) + n;
+			*p ^= get_tod_clock_fast();
+		}
+		n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash);
+		if (n < sizeof(hash))
+			h = hash;
+		else
+			h = ebuf;
+		/* generate sha256 from this page */
+		if (crypt_s390_kimd(KIMD_SHA_256, h,
+				    pg, PAGE_SIZE) != PAGE_SIZE) {
+			prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
+			ret = -EIO;
+			goto out;
+		}
+		if (n < sizeof(hash))
+			memcpy(ebuf, hash, n);
+		ret += n;
+		ebuf += n;
+		nbytes -= n;
+	}
+
+out:
+	free_page((unsigned long)pg);
+	return ret;
+}
+
+
+/*** tdes functions ***/
+
+static void prng_tdes_add_entropy(void)
+{
+	__u64 entropy[4];
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < 16; i++) {
+		ret = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
+				     (char *)entropy, (char *)entropy,
+				     sizeof(entropy));
+		BUG_ON(ret < 0 || ret != sizeof(entropy));
+		memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
+	}
+}
+
+
+static void prng_tdes_seed(int nbytes)
+{
+	char buf[16];
+	int i = 0;
+
+	BUG_ON(nbytes > sizeof(buf));
+
+	get_random_bytes(buf, nbytes);
+
+	/* Add the entropy */
+	while (nbytes >= 8) {
+		*((__u64 *)prng_data->prngws.parm_block) ^= *((__u64 *)(buf+i));
+		prng_tdes_add_entropy();
+		i += 8;
+		nbytes -= 8;
+	}
+	prng_tdes_add_entropy();
+	prng_data->prngws.reseed_counter = 0;
+}
+
+
+static int __init prng_tdes_instantiate(void)
+{
+	int datalen;
+
+	pr_debug("prng runs in TDES mode with "
+		 "chunksize=%d and reseed_limit=%u\n",
+		 prng_chunk_size, prng_reseed_limit);
+
+	/* memory allocation, prng_data struct init, mutex init */
+	datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+	prng_data = kzalloc(datalen, GFP_KERNEL);
+	if (!prng_data) {
+		prng_errorflag = PRNG_INSTANTIATE_FAILED;
+		return -ENOMEM;
+	}
+	mutex_init(&prng_data->mutex);
+	prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+	memcpy(prng_data->prngws.parm_block, initial_parm_block, 32);
+
+	/* initialize the PRNG, add 128 bits of entropy */
+	prng_tdes_seed(16);
+
+	return 0;
+}
+
+
+static void prng_tdes_deinstantiate(void)
+{
+	pr_debug("The prng module stopped "
+		 "after running in triple DES mode\n");
+	kzfree(prng_data);
+}
+
+
+/*** sha512 functions ***/
+
+static int __init prng_sha512_selftest(void)
+{
+	/* NIST DRBG testvector for Hash Drbg, Sha-512, Count #0 */
+	static const u8 seed[] __initconst = {
+		0x6b, 0x50, 0xa7, 0xd8, 0xf8, 0xa5, 0x5d, 0x7a,
+		0x3d, 0xf8, 0xbb, 0x40, 0xbc, 0xc3, 0xb7, 0x22,
+		0xd8, 0x70, 0x8d, 0xe6, 0x7f, 0xda, 0x01, 0x0b,
+		0x03, 0xc4, 0xc8, 0x4d, 0x72, 0x09, 0x6f, 0x8c,
+		0x3e, 0xc6, 0x49, 0xcc, 0x62, 0x56, 0xd9, 0xfa,
+		0x31, 0xdb, 0x7a, 0x29, 0x04, 0xaa, 0xf0, 0x25 };
+	static const u8 V0[] __initconst = {
+		0x00, 0xad, 0xe3, 0x6f, 0x9a, 0x01, 0xc7, 0x76,
+		0x61, 0x34, 0x35, 0xf5, 0x4e, 0x24, 0x74, 0x22,
+		0x21, 0x9a, 0x29, 0x89, 0xc7, 0x93, 0x2e, 0x60,
+		0x1e, 0xe8, 0x14, 0x24, 0x8d, 0xd5, 0x03, 0xf1,
+		0x65, 0x5d, 0x08, 0x22, 0x72, 0xd5, 0xad, 0x95,
+		0xe1, 0x23, 0x1e, 0x8a, 0xa7, 0x13, 0xd9, 0x2b,
+		0x5e, 0xbc, 0xbb, 0x80, 0xab, 0x8d, 0xe5, 0x79,
+		0xab, 0x5b, 0x47, 0x4e, 0xdd, 0xee, 0x6b, 0x03,
+		0x8f, 0x0f, 0x5c, 0x5e, 0xa9, 0x1a, 0x83, 0xdd,
+		0xd3, 0x88, 0xb2, 0x75, 0x4b, 0xce, 0x83, 0x36,
+		0x57, 0x4b, 0xf1, 0x5c, 0xca, 0x7e, 0x09, 0xc0,
+		0xd3, 0x89, 0xc6, 0xe0, 0xda, 0xc4, 0x81, 0x7e,
+		0x5b, 0xf9, 0xe1, 0x01, 0xc1, 0x92, 0x05, 0xea,
+		0xf5, 0x2f, 0xc6, 0xc6, 0xc7, 0x8f, 0xbc, 0xf4 };
+	static const u8 C0[] __initconst = {
+		0x00, 0xf4, 0xa3, 0xe5, 0xa0, 0x72, 0x63, 0x95,
+		0xc6, 0x4f, 0x48, 0xd0, 0x8b, 0x5b, 0x5f, 0x8e,
+		0x6b, 0x96, 0x1f, 0x16, 0xed, 0xbc, 0x66, 0x94,
+		0x45, 0x31, 0xd7, 0x47, 0x73, 0x22, 0xa5, 0x86,
+		0xce, 0xc0, 0x4c, 0xac, 0x63, 0xb8, 0x39, 0x50,
+		0xbf, 0xe6, 0x59, 0x6c, 0x38, 0x58, 0x99, 0x1f,
+		0x27, 0xa7, 0x9d, 0x71, 0x2a, 0xb3, 0x7b, 0xf9,
+		0xfb, 0x17, 0x86, 0xaa, 0x99, 0x81, 0xaa, 0x43,
+		0xe4, 0x37, 0xd3, 0x1e, 0x6e, 0xe5, 0xe6, 0xee,
+		0xc2, 0xed, 0x95, 0x4f, 0x53, 0x0e, 0x46, 0x8a,
+		0xcc, 0x45, 0xa5, 0xdb, 0x69, 0x0d, 0x81, 0xc9,
+		0x32, 0x92, 0xbc, 0x8f, 0x33, 0xe6, 0xf6, 0x09,
+		0x7c, 0x8e, 0x05, 0x19, 0x0d, 0xf1, 0xb6, 0xcc,
+		0xf3, 0x02, 0x21, 0x90, 0x25, 0xec, 0xed, 0x0e };
+	static const u8 random[] __initconst = {
+		0x95, 0xb7, 0xf1, 0x7e, 0x98, 0x02, 0xd3, 0x57,
+		0x73, 0x92, 0xc6, 0xa9, 0xc0, 0x80, 0x83, 0xb6,
+		0x7d, 0xd1, 0x29, 0x22, 0x65, 0xb5, 0xf4, 0x2d,
+		0x23, 0x7f, 0x1c, 0x55, 0xbb, 0x9b, 0x10, 0xbf,
+		0xcf, 0xd8, 0x2c, 0x77, 0xa3, 0x78, 0xb8, 0x26,
+		0x6a, 0x00, 0x99, 0x14, 0x3b, 0x3c, 0x2d, 0x64,
+		0x61, 0x1e, 0xee, 0xb6, 0x9a, 0xcd, 0xc0, 0x55,
+		0x95, 0x7c, 0x13, 0x9e, 0x8b, 0x19, 0x0c, 0x7a,
+		0x06, 0x95, 0x5f, 0x2c, 0x79, 0x7c, 0x27, 0x78,
+		0xde, 0x94, 0x03, 0x96, 0xa5, 0x01, 0xf4, 0x0e,
+		0x91, 0x39, 0x6a, 0xcf, 0x8d, 0x7e, 0x45, 0xeb,
+		0xdb, 0xb5, 0x3b, 0xbf, 0x8c, 0x97, 0x52, 0x30,
+		0xd2, 0xf0, 0xff, 0x91, 0x06, 0xc7, 0x61, 0x19,
+		0xae, 0x49, 0x8e, 0x7f, 0xbc, 0x03, 0xd9, 0x0f,
+		0x8e, 0x4c, 0x51, 0x62, 0x7a, 0xed, 0x5c, 0x8d,
+		0x42, 0x63, 0xd5, 0xd2, 0xb9, 0x78, 0x87, 0x3a,
+		0x0d, 0xe5, 0x96, 0xee, 0x6d, 0xc7, 0xf7, 0xc2,
+		0x9e, 0x37, 0xee, 0xe8, 0xb3, 0x4c, 0x90, 0xdd,
+		0x1c, 0xf6, 0xa9, 0xdd, 0xb2, 0x2b, 0x4c, 0xbd,
+		0x08, 0x6b, 0x14, 0xb3, 0x5d, 0xe9, 0x3d, 0xa2,
+		0xd5, 0xcb, 0x18, 0x06, 0x69, 0x8c, 0xbd, 0x7b,
+		0xbb, 0x67, 0xbf, 0xe3, 0xd3, 0x1f, 0xd2, 0xd1,
+		0xdb, 0xd2, 0xa1, 0xe0, 0x58, 0xa3, 0xeb, 0x99,
+		0xd7, 0xe5, 0x1f, 0x1a, 0x93, 0x8e, 0xed, 0x5e,
+		0x1c, 0x1d, 0xe2, 0x3a, 0x6b, 0x43, 0x45, 0xd3,
+		0x19, 0x14, 0x09, 0xf9, 0x2f, 0x39, 0xb3, 0x67,
+		0x0d, 0x8d, 0xbf, 0xb6, 0x35, 0xd8, 0xe6, 0xa3,
+		0x69, 0x32, 0xd8, 0x10, 0x33, 0xd1, 0x44, 0x8d,
+		0x63, 0xb4, 0x03, 0xdd, 0xf8, 0x8e, 0x12, 0x1b,
+		0x6e, 0x81, 0x9a, 0xc3, 0x81, 0x22, 0x6c, 0x13,
+		0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
+		0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
+
+	int ret = 0;
+	u8 buf[sizeof(random)];
+	struct ppno_ws_s ws;
+
+	memset(&ws, 0, sizeof(ws));
+
+	/* initial seed */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+			      &ws, NULL, 0,
+			      seed, sizeof(seed));
+	if (ret < 0) {
+		pr_err("The prng self test seed operation for the "
+		       "SHA-512 mode failed with rc=%d\n", ret);
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	/* check working states V and C */
+	if (memcmp(ws.V, V0, sizeof(V0)) != 0
+	    || memcmp(ws.C, C0, sizeof(C0)) != 0) {
+		pr_err("The prng self test state test "
+		       "for the SHA-512 mode failed\n");
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	/* generate random bytes */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+			      &ws, buf, sizeof(buf),
+			      NULL, 0);
+	if (ret < 0) {
+		pr_err("The prng self test generate operation for "
+		       "the SHA-512 mode failed with rc=%d\n", ret);
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+			      &ws, buf, sizeof(buf),
+			      NULL, 0);
+	if (ret < 0) {
+		pr_err("The prng self test generate operation for "
+		       "the SHA-512 mode failed with rc=%d\n", ret);
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	/* check against expected data */
+	if (memcmp(buf, random, sizeof(random)) != 0) {
+		pr_err("The prng self test data test "
+		       "for the SHA-512 mode failed\n");
+		prng_errorflag = PRNG_SELFTEST_FAILED;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+
+static int __init prng_sha512_instantiate(void)
+{
+	int ret, datalen;
+	u8 seed[64];
+
+	pr_debug("prng runs in SHA-512 mode "
+		 "with chunksize=%d and reseed_limit=%u\n",
+		 prng_chunk_size, prng_reseed_limit);
+
+	/* memory allocation, prng_data struct init, mutex init */
+	datalen = sizeof(struct prng_data_s) + prng_chunk_size;
+	if (fips_enabled)
+		datalen += prng_chunk_size;
+	prng_data = kzalloc(datalen, GFP_KERNEL);
+	if (!prng_data) {
+		prng_errorflag = PRNG_INSTANTIATE_FAILED;
+		return -ENOMEM;
+	}
+	mutex_init(&prng_data->mutex);
+	prng_data->buf = ((u8 *)prng_data) + sizeof(struct prng_data_s);
+
+	/* selftest */
+	ret = prng_sha512_selftest();
+	if (ret)
+		goto outfree;
+
+	/* generate initial seed bytestring, first 48 bytes of entropy */
+	ret = generate_entropy(seed, 48);
+	if (ret != 48)
+		goto outfree;
+	/* followed by 16 bytes of unique nonce */
+	get_tod_clock_ext(seed + 48);
+
+	/* initial seed of the ppno drng */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+			      &prng_data->ppnows, NULL, 0,
+			      seed, sizeof(seed));
+	if (ret < 0) {
+		prng_errorflag = PRNG_SEED_FAILED;
+		ret = -EIO;
+		goto outfree;
+	}
+
+	/* if fips mode is enabled, generate a first block of random
+	   bytes for the FIPS 140-2 Conditional Self Test */
+	if (fips_enabled) {
+		prng_data->prev = prng_data->buf + prng_chunk_size;
+		ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+				      &prng_data->ppnows,
+				      prng_data->prev,
+				      prng_chunk_size,
+				      NULL, 0);
+		if (ret < 0 || ret != prng_chunk_size) {
+			prng_errorflag = PRNG_GEN_FAILED;
+			ret = -EIO;
+			goto outfree;
+		}
+	}
+
+	return 0;
+
+outfree:
+	kfree(prng_data);
+	return ret;
+}
+
+
+static void prng_sha512_deinstantiate(void)
+{
+	pr_debug("The prng module stopped after running in SHA-512 mode\n");
+	kzfree(prng_data);
+}
+
+
+static int prng_sha512_reseed(void)
+{
+	int ret;
+	u8 seed[32];
+
+	/* generate 32 bytes of fresh entropy */
+	ret = generate_entropy(seed, sizeof(seed));
+	if (ret != sizeof(seed))
+		return ret;
+
+	/* do a reseed of the ppno drng with this bytestring */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_SEED,
+			      &prng_data->ppnows, NULL, 0,
+			      seed, sizeof(seed));
+	if (ret) {
+		prng_errorflag = PRNG_RESEED_FAILED;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+
+static int prng_sha512_generate(u8 *buf, size_t nbytes)
+{
+	int ret;
+
+	/* reseed needed ? */
+	if (prng_data->ppnows.reseed_counter > prng_reseed_limit) {
+		ret = prng_sha512_reseed();
+		if (ret)
+			return ret;
+	}
+
+	/* PPNO generate */
+	ret = crypt_s390_ppno(PPNO_SHA512_DRNG_GEN,
+			      &prng_data->ppnows, buf, nbytes,
+			      NULL, 0);
+	if (ret < 0 || ret != nbytes) {
+		prng_errorflag = PRNG_GEN_FAILED;
+		return -EIO;
+	}
+
+	/* FIPS 140-2 Conditional Self Test */
+	if (fips_enabled) {
+		if (!memcmp(prng_data->prev, buf, nbytes)) {
+			prng_errorflag = PRNG_GEN_FAILED;
+			return -EILSEQ;
+		}
+		memcpy(prng_data->prev, buf, nbytes);
+	}
+
+	return ret;
+}
+
+
+/*** file io functions ***/
+
+static int prng_open(struct inode *inode, struct file *file)
+{
+	return nonseekable_open(inode, file);
+}
+
+
+static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
+			      size_t nbytes, loff_t *ppos)
+{
+	int chunk, n, tmp, ret = 0;
+
+	/* lock prng_data struct */
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+
+	while (nbytes) {
+		if (need_resched()) {
+			if (signal_pending(current)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				break;
+			}
+			/* give mutex free before calling schedule() */
+			mutex_unlock(&prng_data->mutex);
+			schedule();
+			/* occopy mutex again */
+			if (mutex_lock_interruptible(&prng_data->mutex)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				return ret;
+			}
+		}
+
+		/*
+		 * we lose some random bytes if an attacker issues
+		 * reads < 8 bytes, but we don't care
+		 */
+		chunk = min_t(int, nbytes, prng_chunk_size);
+
+		/* PRNG only likes multiples of 8 bytes */
+		n = (chunk + 7) & -8;
+
+		if (prng_data->prngws.reseed_counter > prng_reseed_limit)
+			prng_tdes_seed(8);
+
+		/* if the CPU supports PRNG stckf is present too */
+		*((unsigned long long *)prng_data->buf) = get_tod_clock_fast();
+
+		/*
+		 * Beside the STCKF the input for the TDES-EDE is the output
+		 * of the last operation. We differ here from X9.17 since we
+		 * only store one timestamp into the buffer. Padding the whole
+		 * buffer with timestamps does not improve security, since
+		 * successive stckf have nearly constant offsets.
+		 * If an attacker knows the first timestamp it would be
+		 * trivial to guess the additional values. One timestamp
+		 * is therefore enough and still guarantees unique input values.
+		 *
+		 * Note: you can still get strict X9.17 conformity by setting
+		 * prng_chunk_size to 8 bytes.
+		*/
+		tmp = crypt_s390_kmc(KMC_PRNG, prng_data->prngws.parm_block,
+				     prng_data->buf, prng_data->buf, n);
+		if (tmp < 0 || tmp != n) {
+			ret = -EIO;
+			break;
+		}
+
+		prng_data->prngws.byte_counter += n;
+		prng_data->prngws.reseed_counter += n;
+
+		if (copy_to_user(ubuf, prng_data->buf, chunk))
+			return -EFAULT;
+
+		nbytes -= chunk;
+		ret += chunk;
+		ubuf += chunk;
+	}
+
+	/* unlock prng_data struct */
+	mutex_unlock(&prng_data->mutex);
+
+	return ret;
+}
+
+
+static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
+				size_t nbytes, loff_t *ppos)
+{
+	int n, ret = 0;
+	u8 *p;
+
+	/* if errorflag is set do nothing and return 'broken pipe' */
+	if (prng_errorflag)
+		return -EPIPE;
+
+	/* lock prng_data struct */
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+
+	while (nbytes) {
+		if (need_resched()) {
+			if (signal_pending(current)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				break;
+			}
+			/* give mutex free before calling schedule() */
+			mutex_unlock(&prng_data->mutex);
+			schedule();
+			/* occopy mutex again */
+			if (mutex_lock_interruptible(&prng_data->mutex)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				return ret;
+			}
+		}
+		if (prng_data->rest) {
+			/* push left over random bytes from the previous read */
+			p = prng_data->buf + prng_chunk_size - prng_data->rest;
+			n = (nbytes < prng_data->rest) ?
+				nbytes : prng_data->rest;
+			prng_data->rest -= n;
+		} else {
+			/* generate one chunk of random bytes into read buf */
+			p = prng_data->buf;
+			n = prng_sha512_generate(p, prng_chunk_size);
+			if (n < 0) {
+				ret = n;
+				break;
+			}
+			if (nbytes < prng_chunk_size) {
+				n = nbytes;
+				prng_data->rest = prng_chunk_size - n;
+			} else {
+				n = prng_chunk_size;
+				prng_data->rest = 0;
+			}
+		}
+		if (copy_to_user(ubuf, p, n)) {
+			ret = -EFAULT;
+			break;
+		}
+		ubuf += n;
+		nbytes -= n;
+		ret += n;
+	}
+
+	/* unlock prng_data struct */
+	mutex_unlock(&prng_data->mutex);
+
+	return ret;
+}
+
+
+/*** sysfs stuff ***/
+
+static const struct file_operations prng_sha512_fops = {
+	.owner		= THIS_MODULE,
+	.open		= &prng_open,
+	.release	= NULL,
+	.read		= &prng_sha512_read,
+	.llseek		= noop_llseek,
+};
+static const struct file_operations prng_tdes_fops = {
+	.owner		= THIS_MODULE,
+	.open		= &prng_open,
+	.release	= NULL,
+	.read		= &prng_tdes_read,
+	.llseek		= noop_llseek,
+};
+
+static struct miscdevice prng_sha512_dev = {
+	.name	= "prandom",
+	.minor	= MISC_DYNAMIC_MINOR,
+	.fops	= &prng_sha512_fops,
+};
+static struct miscdevice prng_tdes_dev = {
+	.name	= "prandom",
+	.minor	= MISC_DYNAMIC_MINOR,
+	.fops	= &prng_tdes_fops,
+};
+
+
+/* chunksize attribute (ro) */
+static ssize_t prng_chunksize_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+}
+static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
+
+/* counter attribute (ro) */
+static ssize_t prng_counter_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	u64 counter;
+
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+	if (prng_mode == PRNG_MODE_SHA512)
+		counter = prng_data->ppnows.stream_bytes;
+	else
+		counter = prng_data->prngws.byte_counter;
+	mutex_unlock(&prng_data->mutex);
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", counter);
+}
+static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
+
+/* errorflag attribute (ro) */
+static ssize_t prng_errorflag_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+}
+static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
+
+/* mode attribute (ro) */
+static ssize_t prng_mode_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	if (prng_mode == PRNG_MODE_TDES)
+		return snprintf(buf, PAGE_SIZE, "TDES\n");
+	else
+		return snprintf(buf, PAGE_SIZE, "SHA512\n");
+}
+static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
+
+/* reseed attribute (w) */
+static ssize_t prng_reseed_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	if (mutex_lock_interruptible(&prng_data->mutex))
+		return -ERESTARTSYS;
+	prng_sha512_reseed();
+	mutex_unlock(&prng_data->mutex);
+
+	return count;
+}
+static DEVICE_ATTR(reseed, 0200, NULL, prng_reseed_store);
+
+/* reseed limit attribute (rw) */
+static ssize_t prng_reseed_limit_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+}
+static ssize_t prng_reseed_limit_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	unsigned limit;
+
+	if (sscanf(buf, "%u\n", &limit) != 1)
+		return -EINVAL;
+
+	if (prng_mode == PRNG_MODE_SHA512) {
+		if (limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+			return -EINVAL;
+	} else {
+		if (limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+			return -EINVAL;
+	}
+
+	prng_reseed_limit = limit;
+
+	return count;
+}
+static DEVICE_ATTR(reseed_limit, 0644,
+		   prng_reseed_limit_show, prng_reseed_limit_store);
+
+/* strength attribute (ro) */
+static ssize_t prng_strength_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "256\n");
+}
+static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
+
+static struct attribute *prng_sha512_dev_attrs[] = {
+	&dev_attr_errorflag.attr,
+	&dev_attr_chunksize.attr,
+	&dev_attr_byte_counter.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_reseed.attr,
+	&dev_attr_reseed_limit.attr,
+	&dev_attr_strength.attr,
+	NULL
+};
+static struct attribute *prng_tdes_dev_attrs[] = {
+	&dev_attr_chunksize.attr,
+	&dev_attr_byte_counter.attr,
+	&dev_attr_mode.attr,
+	NULL
+};
+
+static struct attribute_group prng_sha512_dev_attr_group = {
+	.attrs = prng_sha512_dev_attrs
+};
+static struct attribute_group prng_tdes_dev_attr_group = {
+	.attrs = prng_tdes_dev_attrs
+};
+
+
+/*** module init and exit ***/
+
+static int __init prng_init(void)
+{
+	int ret;
+
+	/* check if the CPU has a PRNG */
+	if (!crypt_s390_func_available(KMC_PRNG, CRYPT_S390_MSA))
+		return -EOPNOTSUPP;
+
+	/* choose prng mode */
+	if (prng_mode != PRNG_MODE_TDES) {
+		/* check for MSA5 support for PPNO operations */
+		if (!crypt_s390_func_available(PPNO_SHA512_DRNG_GEN,
+					       CRYPT_S390_MSA5)) {
+			if (prng_mode == PRNG_MODE_SHA512) {
+				pr_err("The prng module cannot "
+				       "start in SHA-512 mode\n");
+				return -EOPNOTSUPP;
+			}
+			prng_mode = PRNG_MODE_TDES;
+		} else
+			prng_mode = PRNG_MODE_SHA512;
+	}
+
+	if (prng_mode == PRNG_MODE_SHA512) {
+
+		/* SHA512 mode */
+
+		if (prng_chunk_size < PRNG_CHUNKSIZE_SHA512_MIN
+		    || prng_chunk_size > PRNG_CHUNKSIZE_SHA512_MAX)
+			return -EINVAL;
+		prng_chunk_size = (prng_chunk_size + 0x3f) & ~0x3f;
+
+		if (prng_reseed_limit == 0)
+			prng_reseed_limit = PRNG_RESEED_LIMIT_SHA512;
+		else if (prng_reseed_limit < PRNG_RESEED_LIMIT_SHA512_LOWER)
+			return -EINVAL;
+
+		ret = prng_sha512_instantiate();
+		if (ret)
+			goto out;
+
+		ret = misc_register(&prng_sha512_dev);
+		if (ret) {
+			prng_sha512_deinstantiate();
+			goto out;
+		}
+		ret = sysfs_create_group(&prng_sha512_dev.this_device->kobj,
+					 &prng_sha512_dev_attr_group);
+		if (ret) {
+			misc_deregister(&prng_sha512_dev);
+			prng_sha512_deinstantiate();
+			goto out;
+		}
+
+	} else {
+
+		/* TDES mode */
+
+		if (prng_chunk_size < PRNG_CHUNKSIZE_TDES_MIN
+		    || prng_chunk_size > PRNG_CHUNKSIZE_TDES_MAX)
+			return -EINVAL;
+		prng_chunk_size = (prng_chunk_size + 0x07) & ~0x07;
+
+		if (prng_reseed_limit == 0)
+			prng_reseed_limit = PRNG_RESEED_LIMIT_TDES;
+		else if (prng_reseed_limit < PRNG_RESEED_LIMIT_TDES_LOWER)
+			return -EINVAL;
+
+		ret = prng_tdes_instantiate();
+		if (ret)
+			goto out;
+
+		ret = misc_register(&prng_tdes_dev);
+		if (ret) {
+			prng_tdes_deinstantiate();
+			goto out;
+		}
+		ret = sysfs_create_group(&prng_tdes_dev.this_device->kobj,
+					 &prng_tdes_dev_attr_group);
+		if (ret) {
+			misc_deregister(&prng_tdes_dev);
+			prng_tdes_deinstantiate();
+			goto out;
+		}
+
+	}
+
+out:
+	return ret;
+}
+
+
+static void __exit prng_exit(void)
+{
+	if (prng_mode == PRNG_MODE_SHA512) {
+		sysfs_remove_group(&prng_sha512_dev.this_device->kobj,
+				   &prng_sha512_dev_attr_group);
+		misc_deregister(&prng_sha512_dev);
+		prng_sha512_deinstantiate();
+	} else {
+		sysfs_remove_group(&prng_tdes_dev.this_device->kobj,
+				   &prng_tdes_dev_attr_group);
+		misc_deregister(&prng_tdes_dev);
+		prng_tdes_deinstantiate();
+	}
+}
+
+
+module_init(prng_init);
+module_exit(prng_exit);
diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h
new file mode 100644
index 000000000..f4e9dc716
--- /dev/null
+++ b/arch/s390/crypto/sha.h
@@ -0,0 +1,37 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 generic implementation of the SHA Secure Hash Algorithms.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ARCH_S390_SHA_H
+#define _CRYPTO_ARCH_S390_SHA_H
+
+#include <linux/crypto.h>
+#include <crypto/sha.h>
+
+/* must be big enough for the largest SHA variant */
+#define SHA_MAX_STATE_SIZE	16
+#define SHA_MAX_BLOCK_SIZE      SHA512_BLOCK_SIZE
+
+struct s390_sha_ctx {
+	u64 count;              /* message length in bytes */
+	u32 state[SHA_MAX_STATE_SIZE];
+	u8 buf[2 * SHA_MAX_BLOCK_SIZE];
+	int func;		/* KIMD function to use */
+};
+
+struct shash_desc;
+
+int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len);
+int s390_sha_final(struct shash_desc *desc, u8 *out);
+
+#endif
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
new file mode 100644
index 000000000..5b2bee323
--- /dev/null
+++ b/arch/s390/crypto/sha1_s390.c
@@ -0,0 +1,108 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA1 Secure Hash Algorithm.
+ *
+ * Derived from cryptoapi implementation, adapted for in-place
+ * scatterlist interface.  Originally based on the public domain
+ * implementation written by Steve Reid.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2003, 2007
+ *   Author(s): Thomas Spatzier
+ *		Jan Glauber (jan.glauber@de.ibm.com)
+ *
+ * Derived from "crypto/sha1_generic.c"
+ *   Copyright (c) Alan Smithee.
+ *   Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ *   Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <crypto/sha.h>
+
+#include "crypt_s390.h"
+#include "sha.h"
+
+static int sha1_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA1_H0;
+	sctx->state[1] = SHA1_H1;
+	sctx->state[2] = SHA1_H2;
+	sctx->state[3] = SHA1_H3;
+	sctx->state[4] = SHA1_H4;
+	sctx->count = 0;
+	sctx->func = KIMD_SHA_1;
+
+	return 0;
+}
+
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_state *octx = out;
+
+	octx->count = sctx->count;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer));
+	return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha1_state *ictx = in;
+
+	sctx->count = ictx->count;
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer));
+	sctx->func = KIMD_SHA_1;
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha1_export,
+	.import		=	sha1_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-s390",
+		.cra_priority	=	CRYPT_S390_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init sha1_s390_init(void)
+{
+	if (!crypt_s390_func_available(KIMD_SHA_1, CRYPT_S390_MSA))
+		return -EOPNOTSUPP;
+	return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_s390_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_s390_init);
+module_exit(sha1_s390_fini);
+
+MODULE_ALIAS_CRYPTO("sha1");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
new file mode 100644
index 000000000..b74ff1581
--- /dev/null
+++ b/arch/s390/crypto/sha256_s390.c
@@ -0,0 +1,149 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA256 and SHA224 Secure Hash Algorithm.
+ *
+ * s390 Version:
+ *   Copyright IBM Corp. 2005, 2011
+ *   Author(s): Jan Glauber (jang@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <crypto/sha.h>
+
+#include "crypt_s390.h"
+#include "sha.h"
+
+static int sha256_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+	sctx->func = KIMD_SHA_256;
+
+	return 0;
+}
+
+static int sha256_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha256_state *octx = out;
+
+	octx->count = sctx->count;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+	return 0;
+}
+
+static int sha256_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha256_state *ictx = in;
+
+	sctx->count = ictx->count;
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = KIMD_SHA_256;
+	return 0;
+}
+
+static struct shash_alg sha256_alg = {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name=	"sha256-s390",
+		.cra_priority	=	CRYPT_S390_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int sha224_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+	sctx->func = KIMD_SHA_256;
+
+	return 0;
+}
+
+static struct shash_alg sha224_alg = {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name=	"sha224-s390",
+		.cra_priority	=	CRYPT_S390_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static int __init sha256_s390_init(void)
+{
+	int ret;
+
+	if (!crypt_s390_func_available(KIMD_SHA_256, CRYPT_S390_MSA))
+		return -EOPNOTSUPP;
+	ret = crypto_register_shash(&sha256_alg);
+	if (ret < 0)
+		goto out;
+	ret = crypto_register_shash(&sha224_alg);
+	if (ret < 0)
+		crypto_unregister_shash(&sha256_alg);
+out:
+	return ret;
+}
+
+static void __exit sha256_s390_fini(void)
+{
+	crypto_unregister_shash(&sha224_alg);
+	crypto_unregister_shash(&sha256_alg);
+}
+
+module_init(sha256_s390_init);
+module_exit(sha256_s390_fini);
+
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
new file mode 100644
index 000000000..0c36989ba
--- /dev/null
+++ b/arch/s390/crypto/sha512_s390.c
@@ -0,0 +1,155 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 implementation of the SHA512 and SHA38 Secure Hash Algorithm.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "sha.h"
+#include "crypt_s390.h"
+
+static int sha512_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__u64 *)&ctx->state[0] = 0x6a09e667f3bcc908ULL;
+	*(__u64 *)&ctx->state[2] = 0xbb67ae8584caa73bULL;
+	*(__u64 *)&ctx->state[4] = 0x3c6ef372fe94f82bULL;
+	*(__u64 *)&ctx->state[6] = 0xa54ff53a5f1d36f1ULL;
+	*(__u64 *)&ctx->state[8] = 0x510e527fade682d1ULL;
+	*(__u64 *)&ctx->state[10] = 0x9b05688c2b3e6c1fULL;
+	*(__u64 *)&ctx->state[12] = 0x1f83d9abfb41bd6bULL;
+	*(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL;
+	ctx->count = 0;
+	ctx->func = KIMD_SHA_512;
+
+	return 0;
+}
+
+static int sha512_export(struct shash_desc *desc, void *out)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	struct sha512_state *octx = out;
+
+	octx->count[0] = sctx->count;
+	octx->count[1] = 0;
+	memcpy(octx->state, sctx->state, sizeof(octx->state));
+	memcpy(octx->buf, sctx->buf, sizeof(octx->buf));
+	return 0;
+}
+
+static int sha512_import(struct shash_desc *desc, const void *in)
+{
+	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
+	const struct sha512_state *ictx = in;
+
+	if (unlikely(ictx->count[1]))
+		return -ERANGE;
+	sctx->count = ictx->count[0];
+
+	memcpy(sctx->state, ictx->state, sizeof(ictx->state));
+	memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf));
+	sctx->func = KIMD_SHA_512;
+	return 0;
+}
+
+static struct shash_alg sha512_alg = {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha512_export,
+	.import		=	sha512_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name=	"sha512-s390",
+		.cra_priority	=	CRYPT_S390_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+MODULE_ALIAS_CRYPTO("sha512");
+
+static int sha384_init(struct shash_desc *desc)
+{
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__u64 *)&ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
+	*(__u64 *)&ctx->state[2] = 0x629a292a367cd507ULL;
+	*(__u64 *)&ctx->state[4] = 0x9159015a3070dd17ULL;
+	*(__u64 *)&ctx->state[6] = 0x152fecd8f70e5939ULL;
+	*(__u64 *)&ctx->state[8] = 0x67332667ffc00b31ULL;
+	*(__u64 *)&ctx->state[10] = 0x8eb44a8768581511ULL;
+	*(__u64 *)&ctx->state[12] = 0xdb0c2e0d64f98fa7ULL;
+	*(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL;
+	ctx->count = 0;
+	ctx->func = KIMD_SHA_512;
+
+	return 0;
+}
+
+static struct shash_alg sha384_alg = {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_init,
+	.update		=	s390_sha_update,
+	.final		=	s390_sha_final,
+	.export		=	sha512_export,
+	.import		=	sha512_import,
+	.descsize	=	sizeof(struct s390_sha_ctx),
+	.statesize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name=	"sha384-s390",
+		.cra_priority	=	CRYPT_S390_PRIORITY,
+		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_ctxsize	=	sizeof(struct s390_sha_ctx),
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+MODULE_ALIAS_CRYPTO("sha384");
+
+static int __init init(void)
+{
+	int ret;
+
+	if (!crypt_s390_func_available(KIMD_SHA_512, CRYPT_S390_MSA))
+		return -EOPNOTSUPP;
+	if ((ret = crypto_register_shash(&sha512_alg)) < 0)
+		goto out;
+	if ((ret = crypto_register_shash(&sha384_alg)) < 0)
+		crypto_unregister_shash(&sha512_alg);
+out:
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	crypto_unregister_shash(&sha512_alg);
+	crypto_unregister_shash(&sha384_alg);
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA512 and SHA-384 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
new file mode 100644
index 000000000..8620b0ec9
--- /dev/null
+++ b/arch/s390/crypto/sha_common.c
@@ -0,0 +1,106 @@
+/*
+ * Cryptographic API.
+ *
+ * s390 generic implementation of the SHA Secure Hash Algorithms.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Jan Glauber (jang@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+#include "sha.h"
+#include "crypt_s390.h"
+
+int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
+{
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
+	unsigned int index;
+	int ret;
+
+	/* how much is already in the buffer? */
+	index = ctx->count & (bsize - 1);
+	ctx->count += len;
+
+	if ((index + len) < bsize)
+		goto store;
+
+	/* process one stored block */
+	if (index) {
+		memcpy(ctx->buf + index, data, bsize - index);
+		ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, bsize);
+		if (ret != bsize)
+			return -EIO;
+		data += bsize - index;
+		len -= bsize - index;
+		index = 0;
+	}
+
+	/* process as many blocks as possible */
+	if (len >= bsize) {
+		ret = crypt_s390_kimd(ctx->func, ctx->state, data,
+				      len & ~(bsize - 1));
+		if (ret != (len & ~(bsize - 1)))
+			return -EIO;
+		data += ret;
+		len -= ret;
+	}
+store:
+	if (len)
+		memcpy(ctx->buf + index , data, len);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_sha_update);
+
+int s390_sha_final(struct shash_desc *desc, u8 *out)
+{
+	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
+	u64 bits;
+	unsigned int index, end, plen;
+	int ret;
+
+	/* SHA-512 uses 128 bit padding length */
+	plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8;
+
+	/* must perform manual padding */
+	index = ctx->count & (bsize - 1);
+	end = (index < bsize - plen) ? bsize : (2 * bsize);
+
+	/* start pad with 1 */
+	ctx->buf[index] = 0x80;
+	index++;
+
+	/* pad with zeros */
+	memset(ctx->buf + index, 0x00, end - index - 8);
+
+	/*
+	 * Append message length. Well, SHA-512 wants a 128 bit length value,
+	 * nevertheless we use u64, should be enough for now...
+	 */
+	bits = ctx->count * 8;
+	memcpy(ctx->buf + end - 8, &bits, sizeof(bits));
+
+	ret = crypt_s390_kimd(ctx->func, ctx->state, ctx->buf, end);
+	if (ret != end)
+		return -EIO;
+
+	/* copy digest to out */
+	memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
+	/* wipe context */
+	memset(ctx, 0, sizeof *ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_sha_final);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("s390 SHA cipher common functions");
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
new file mode 100644
index 000000000..83ef702d2
--- /dev/null
+++ b/arch/s390/defconfig
@@ -0,0 +1,203 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_BLK_CGROUP=y
+CONFIG_NAMESPACES=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_BPF_SYSCALL=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_IBM_PARTITION=y
+CONFIG_DEFAULT_DEADLINE=y
+CONFIG_MARCH_Z196=y
+CONFIG_NR_CPUS=256
+CONFIG_HZ_100=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_CMA=y
+CONFIG_CRASH_DUMP=y
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_INET_LRO is not set
+CONFIG_IPV6=y
+CONFIG_L2TP=m
+CONFIG_L2TP_DEBUGFS=m
+CONFIG_VLAN_8021Q=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=y
+CONFIG_BPF_JIT=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_ZFCP=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_DUMMY=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_VIRTIO_NET=y
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+CONFIG_RAW_DRIVER=m
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_RT=y
+CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_FANOTIFY=y
+CONFIG_FUSE_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_PROVE_RCU=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RCU_TRACE=y
+CONFIG_LATENCYTOP=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_KPROBES_SANITY_TEST=y
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_S390_PTDUMP=y
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_AUTHENC=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_CMAC=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_CRC32=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_ZLIB=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_LZ4=m
+CONFIG_CRYPTO_LZ4HC=m
+CONFIG_ZCRYPT=m
+CONFIG_CRYPTO_SHA1_S390=m
+CONFIG_CRYPTO_SHA256_S390=m
+CONFIG_CRYPTO_SHA512_S390=m
+CONFIG_CRYPTO_DES_S390=m
+CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRC7=m
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_POWERPC is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_CMM=m
diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile
new file mode 100644
index 000000000..2ee25ba25
--- /dev/null
+++ b/arch/s390/hypfs/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux hypfs filesystem routines.
+#
+
+obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
+
+s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o
+s390_hypfs-objs += hypfs_diag0c.o
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
new file mode 100644
index 000000000..eecde500e
--- /dev/null
+++ b/arch/s390/hypfs/hypfs.h
@@ -0,0 +1,76 @@
+/*
+ *    Hypervisor filesystem for Linux on s390.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _HYPFS_H_
+#define _HYPFS_H_
+
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/workqueue.h>
+#include <linux/kref.h>
+#include <asm/hypfs.h>
+
+#define REG_FILE_MODE    0440
+#define UPDATE_FILE_MODE 0220
+#define DIR_MODE         0550
+
+extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name);
+
+extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name,
+				       __u64 value);
+
+extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name,
+				       char *string);
+
+/* LPAR Hypervisor */
+extern int hypfs_diag_init(void);
+extern void hypfs_diag_exit(void);
+extern int hypfs_diag_create_files(struct dentry *root);
+
+/* VM Hypervisor */
+extern int hypfs_vm_init(void);
+extern void hypfs_vm_exit(void);
+extern int hypfs_vm_create_files(struct dentry *root);
+
+/* VM diagnose 0c */
+int hypfs_diag0c_init(void);
+void hypfs_diag0c_exit(void);
+
+/* Set Partition-Resource Parameter */
+int hypfs_sprp_init(void);
+void hypfs_sprp_exit(void);
+
+/* debugfs interface */
+struct hypfs_dbfs_file;
+
+struct hypfs_dbfs_data {
+	void			*buf;
+	void			*buf_free_ptr;
+	size_t			size;
+	struct hypfs_dbfs_file	*dbfs_file;
+};
+
+struct hypfs_dbfs_file {
+	const char	*name;
+	int		(*data_create)(void **data, void **data_free_ptr,
+				       size_t *size);
+	void		(*data_free)(const void *buf_free_ptr);
+	long		(*unlocked_ioctl) (struct file *, unsigned int,
+					   unsigned long);
+
+	/* Private data for hypfs_dbfs.c */
+	struct mutex		lock;
+	struct dentry		*dentry;
+};
+
+extern int hypfs_dbfs_init(void);
+extern void hypfs_dbfs_exit(void);
+extern int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df);
+extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df);
+
+#endif /* _HYPFS_H_ */
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
new file mode 100644
index 000000000..752f6df3e
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -0,0 +1,104 @@
+/*
+ * Hypervisor filesystem for Linux on s390 - debugfs interface
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include "hypfs.h"
+
+static struct dentry *dbfs_dir;
+
+static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f)
+{
+	struct hypfs_dbfs_data *data;
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return NULL;
+	data->dbfs_file = f;
+	return data;
+}
+
+static void hypfs_dbfs_data_free(struct hypfs_dbfs_data *data)
+{
+	data->dbfs_file->data_free(data->buf_free_ptr);
+	kfree(data);
+}
+
+static ssize_t dbfs_read(struct file *file, char __user *buf,
+			 size_t size, loff_t *ppos)
+{
+	struct hypfs_dbfs_data *data;
+	struct hypfs_dbfs_file *df;
+	ssize_t rc;
+
+	if (*ppos != 0)
+		return 0;
+
+	df = file_inode(file)->i_private;
+	mutex_lock(&df->lock);
+	data = hypfs_dbfs_data_alloc(df);
+	if (!data) {
+		mutex_unlock(&df->lock);
+		return -ENOMEM;
+	}
+	rc = df->data_create(&data->buf, &data->buf_free_ptr, &data->size);
+	if (rc) {
+		mutex_unlock(&df->lock);
+		kfree(data);
+		return rc;
+	}
+	mutex_unlock(&df->lock);
+
+	rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size);
+	hypfs_dbfs_data_free(data);
+	return rc;
+}
+
+static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct hypfs_dbfs_file *df = file_inode(file)->i_private;
+	long rc;
+
+	mutex_lock(&df->lock);
+	if (df->unlocked_ioctl)
+		rc = df->unlocked_ioctl(file, cmd, arg);
+	else
+		rc = -ENOTTY;
+	mutex_unlock(&df->lock);
+	return rc;
+}
+
+static const struct file_operations dbfs_ops = {
+	.read		= dbfs_read,
+	.llseek		= no_llseek,
+	.unlocked_ioctl = dbfs_ioctl,
+};
+
+int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
+{
+	df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
+					 &dbfs_ops);
+	if (IS_ERR(df->dentry))
+		return PTR_ERR(df->dentry);
+	mutex_init(&df->lock);
+	return 0;
+}
+
+void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
+{
+	debugfs_remove(df->dentry);
+}
+
+int hypfs_dbfs_init(void)
+{
+	dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
+	return PTR_ERR_OR_ZERO(dbfs_dir);
+}
+
+void hypfs_dbfs_exit(void)
+{
+	debugfs_remove(dbfs_dir);
+}
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
new file mode 100644
index 000000000..5eeffeefa
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -0,0 +1,769 @@
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/ebcdic.h>
+#include "hypfs.h"
+
+#define LPAR_NAME_LEN 8		/* lpar name len in diag 204 data */
+#define CPU_NAME_LEN 16		/* type name len of cpus in diag224 name table */
+#define TMP_SIZE 64		/* size of temporary buffers */
+
+#define DBFS_D204_HDR_VERSION	0
+
+/* diag 204 subcodes */
+enum diag204_sc {
+	SUBC_STIB4 = 4,
+	SUBC_RSI = 5,
+	SUBC_STIB6 = 6,
+	SUBC_STIB7 = 7
+};
+
+/* The two available diag 204 data formats */
+enum diag204_format {
+	INFO_SIMPLE = 0,
+	INFO_EXT = 0x00010000
+};
+
+/* bit is set in flags, when physical cpu info is included in diag 204 data */
+#define LPAR_PHYS_FLG  0x80
+
+static char *diag224_cpu_names;			/* diag 224 name table */
+static enum diag204_sc diag204_store_sc;	/* used subcode for store */
+static enum diag204_format diag204_info_type;	/* used diag 204 data format */
+
+static void *diag204_buf;		/* 4K aligned buffer for diag204 data */
+static void *diag204_buf_vmalloc;	/* vmalloc pointer for diag204 data */
+static int diag204_buf_pages;		/* number of pages for diag204 data */
+
+static struct dentry *dbfs_d204_file;
+
+/*
+ * DIAG 204 data structures and member access functions.
+ *
+ * Since we have two different diag 204 data formats for old and new s390
+ * machines, we do not access the structs directly, but use getter functions for
+ * each struct member instead. This should make the code more readable.
+ */
+
+/* Time information block */
+
+struct info_blk_hdr {
+	__u8  npar;
+	__u8  flags;
+	__u16 tslice;
+	__u16 phys_cpus;
+	__u16 this_part;
+	__u64 curtod;
+} __attribute__ ((packed));
+
+struct x_info_blk_hdr {
+	__u8  npar;
+	__u8  flags;
+	__u16 tslice;
+	__u16 phys_cpus;
+	__u16 this_part;
+	__u64 curtod1;
+	__u64 curtod2;
+	char reserved[40];
+} __attribute__ ((packed));
+
+static inline int info_blk_hdr__size(enum diag204_format type)
+{
+	if (type == INFO_SIMPLE)
+		return sizeof(struct info_blk_hdr);
+	else /* INFO_EXT */
+		return sizeof(struct x_info_blk_hdr);
+}
+
+static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct info_blk_hdr *)hdr)->npar;
+	else /* INFO_EXT */
+		return ((struct x_info_blk_hdr *)hdr)->npar;
+}
+
+static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct info_blk_hdr *)hdr)->flags;
+	else /* INFO_EXT */
+		return ((struct x_info_blk_hdr *)hdr)->flags;
+}
+
+static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct info_blk_hdr *)hdr)->phys_cpus;
+	else /* INFO_EXT */
+		return ((struct x_info_blk_hdr *)hdr)->phys_cpus;
+}
+
+/* Partition header */
+
+struct part_hdr {
+	__u8 pn;
+	__u8 cpus;
+	char reserved[6];
+	char part_name[LPAR_NAME_LEN];
+} __attribute__ ((packed));
+
+struct x_part_hdr {
+	__u8  pn;
+	__u8  cpus;
+	__u8  rcpus;
+	__u8  pflag;
+	__u32 mlu;
+	char  part_name[LPAR_NAME_LEN];
+	char  lpc_name[8];
+	char  os_name[8];
+	__u64 online_cs;
+	__u64 online_es;
+	__u8  upid;
+	char  reserved1[3];
+	__u32 group_mlu;
+	char  group_name[8];
+	char  reserved2[32];
+} __attribute__ ((packed));
+
+static inline int part_hdr__size(enum diag204_format type)
+{
+	if (type == INFO_SIMPLE)
+		return sizeof(struct part_hdr);
+	else /* INFO_EXT */
+		return sizeof(struct x_part_hdr);
+}
+
+static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct part_hdr *)hdr)->cpus;
+	else /* INFO_EXT */
+		return ((struct x_part_hdr *)hdr)->rcpus;
+}
+
+static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
+				       char *name)
+{
+	if (type == INFO_SIMPLE)
+		memcpy(name, ((struct part_hdr *)hdr)->part_name,
+		       LPAR_NAME_LEN);
+	else /* INFO_EXT */
+		memcpy(name, ((struct x_part_hdr *)hdr)->part_name,
+		       LPAR_NAME_LEN);
+	EBCASC(name, LPAR_NAME_LEN);
+	name[LPAR_NAME_LEN] = 0;
+	strim(name);
+}
+
+struct cpu_info {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	__u8  cflag;
+	__u16 weight;
+	__u64 acc_time;
+	__u64 lp_time;
+} __attribute__ ((packed));
+
+struct x_cpu_info {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	__u8  cflag;
+	__u16 weight;
+	__u64 acc_time;
+	__u64 lp_time;
+	__u16 min_weight;
+	__u16 cur_weight;
+	__u16 max_weight;
+	char  reseved2[2];
+	__u64 online_time;
+	__u64 wait_time;
+	__u32 pma_weight;
+	__u32 polar_weight;
+	char  reserved3[40];
+} __attribute__ ((packed));
+
+/* CPU info block */
+
+static inline int cpu_info__size(enum diag204_format type)
+{
+	if (type == INFO_SIMPLE)
+		return sizeof(struct cpu_info);
+	else /* INFO_EXT */
+		return sizeof(struct x_cpu_info);
+}
+
+static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct cpu_info *)hdr)->ctidx;
+	else /* INFO_EXT */
+		return ((struct x_cpu_info *)hdr)->ctidx;
+}
+
+static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct cpu_info *)hdr)->cpu_addr;
+	else /* INFO_EXT */
+		return ((struct x_cpu_info *)hdr)->cpu_addr;
+}
+
+static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct cpu_info *)hdr)->acc_time;
+	else /* INFO_EXT */
+		return ((struct x_cpu_info *)hdr)->acc_time;
+}
+
+static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct cpu_info *)hdr)->lp_time;
+	else /* INFO_EXT */
+		return ((struct x_cpu_info *)hdr)->lp_time;
+}
+
+static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return 0;	/* online_time not available in simple info */
+	else /* INFO_EXT */
+		return ((struct x_cpu_info *)hdr)->online_time;
+}
+
+/* Physical header */
+
+struct phys_hdr {
+	char reserved1[1];
+	__u8 cpus;
+	char reserved2[6];
+	char mgm_name[8];
+} __attribute__ ((packed));
+
+struct x_phys_hdr {
+	char reserved1[1];
+	__u8 cpus;
+	char reserved2[6];
+	char mgm_name[8];
+	char reserved3[80];
+} __attribute__ ((packed));
+
+static inline int phys_hdr__size(enum diag204_format type)
+{
+	if (type == INFO_SIMPLE)
+		return sizeof(struct phys_hdr);
+	else /* INFO_EXT */
+		return sizeof(struct x_phys_hdr);
+}
+
+static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct phys_hdr *)hdr)->cpus;
+	else /* INFO_EXT */
+		return ((struct x_phys_hdr *)hdr)->cpus;
+}
+
+/* Physical CPU info block */
+
+struct phys_cpu {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	char  reserved2[3];
+	__u64 mgm_time;
+	char  reserved3[8];
+} __attribute__ ((packed));
+
+struct x_phys_cpu {
+	__u16 cpu_addr;
+	char  reserved1[2];
+	__u8  ctidx;
+	char  reserved2[3];
+	__u64 mgm_time;
+	char  reserved3[80];
+} __attribute__ ((packed));
+
+static inline int phys_cpu__size(enum diag204_format type)
+{
+	if (type == INFO_SIMPLE)
+		return sizeof(struct phys_cpu);
+	else /* INFO_EXT */
+		return sizeof(struct x_phys_cpu);
+}
+
+static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct phys_cpu *)hdr)->cpu_addr;
+	else /* INFO_EXT */
+		return ((struct x_phys_cpu *)hdr)->cpu_addr;
+}
+
+static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct phys_cpu *)hdr)->mgm_time;
+	else /* INFO_EXT */
+		return ((struct x_phys_cpu *)hdr)->mgm_time;
+}
+
+static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
+{
+	if (type == INFO_SIMPLE)
+		return ((struct phys_cpu *)hdr)->ctidx;
+	else /* INFO_EXT */
+		return ((struct x_phys_cpu *)hdr)->ctidx;
+}
+
+/* Diagnose 204 functions */
+
+static int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+	register unsigned long _subcode asm("0") = subcode;
+	register unsigned long _size asm("1") = size;
+
+	asm volatile(
+		"	diag	%2,%0,0x204\n"
+		"0:\n"
+		EX_TABLE(0b,0b)
+		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
+	if (_subcode)
+		return -1;
+	return _size;
+}
+
+/*
+ * For the old diag subcode 4 with simple data format we have to use real
+ * memory. If we use subcode 6 or 7 with extended data format, we can (and
+ * should) use vmalloc, since we need a lot of memory in that case. Currently
+ * up to 93 pages!
+ */
+
+static void diag204_free_buffer(void)
+{
+	if (!diag204_buf)
+		return;
+	if (diag204_buf_vmalloc) {
+		vfree(diag204_buf_vmalloc);
+		diag204_buf_vmalloc = NULL;
+	} else {
+		free_pages((unsigned long) diag204_buf, 0);
+	}
+	diag204_buf = NULL;
+}
+
+static void *page_align_ptr(void *ptr)
+{
+	return (void *) PAGE_ALIGN((unsigned long) ptr);
+}
+
+static void *diag204_alloc_vbuf(int pages)
+{
+	/* The buffer has to be page aligned! */
+	diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1));
+	if (!diag204_buf_vmalloc)
+		return ERR_PTR(-ENOMEM);
+	diag204_buf = page_align_ptr(diag204_buf_vmalloc);
+	diag204_buf_pages = pages;
+	return diag204_buf;
+}
+
+static void *diag204_alloc_rbuf(void)
+{
+	diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0);
+	if (!diag204_buf)
+		return ERR_PTR(-ENOMEM);
+	diag204_buf_pages = 1;
+	return diag204_buf;
+}
+
+static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
+{
+	if (diag204_buf) {
+		*pages = diag204_buf_pages;
+		return diag204_buf;
+	}
+	if (fmt == INFO_SIMPLE) {
+		*pages = 1;
+		return diag204_alloc_rbuf();
+	} else {/* INFO_EXT */
+		*pages = diag204((unsigned long)SUBC_RSI |
+				 (unsigned long)INFO_EXT, 0, NULL);
+		if (*pages <= 0)
+			return ERR_PTR(-ENOSYS);
+		else
+			return diag204_alloc_vbuf(*pages);
+	}
+}
+
+/*
+ * diag204_probe() has to find out, which type of diagnose 204 implementation
+ * we have on our machine. Currently there are three possible scanarios:
+ *   - subcode 4   + simple data format (only one page)
+ *   - subcode 4-6 + extended data format
+ *   - subcode 4-7 + extended data format
+ *
+ * Subcode 5 is used to retrieve the size of the data, provided by subcodes
+ * 6 and 7. Subcode 7 basically has the same function as subcode 6. In addition
+ * to subcode 6 it provides also information about secondary cpus.
+ * In order to get as much information as possible, we first try
+ * subcode 7, then 6 and if both fail, we use subcode 4.
+ */
+
+static int diag204_probe(void)
+{
+	void *buf;
+	int pages, rc;
+
+	buf = diag204_get_buffer(INFO_EXT, &pages);
+	if (!IS_ERR(buf)) {
+		if (diag204((unsigned long)SUBC_STIB7 |
+			    (unsigned long)INFO_EXT, pages, buf) >= 0) {
+			diag204_store_sc = SUBC_STIB7;
+			diag204_info_type = INFO_EXT;
+			goto out;
+		}
+		if (diag204((unsigned long)SUBC_STIB6 |
+			    (unsigned long)INFO_EXT, pages, buf) >= 0) {
+			diag204_store_sc = SUBC_STIB6;
+			diag204_info_type = INFO_EXT;
+			goto out;
+		}
+		diag204_free_buffer();
+	}
+
+	/* subcodes 6 and 7 failed, now try subcode 4 */
+
+	buf = diag204_get_buffer(INFO_SIMPLE, &pages);
+	if (IS_ERR(buf)) {
+		rc = PTR_ERR(buf);
+		goto fail_alloc;
+	}
+	if (diag204((unsigned long)SUBC_STIB4 |
+		    (unsigned long)INFO_SIMPLE, pages, buf) >= 0) {
+		diag204_store_sc = SUBC_STIB4;
+		diag204_info_type = INFO_SIMPLE;
+		goto out;
+	} else {
+		rc = -ENOSYS;
+		goto fail_store;
+	}
+out:
+	rc = 0;
+fail_store:
+	diag204_free_buffer();
+fail_alloc:
+	return rc;
+}
+
+static int diag204_do_store(void *buf, int pages)
+{
+	int rc;
+
+	rc = diag204((unsigned long) diag204_store_sc |
+		     (unsigned long) diag204_info_type, pages, buf);
+	return rc < 0 ? -ENOSYS : 0;
+}
+
+static void *diag204_store(void)
+{
+	void *buf;
+	int pages, rc;
+
+	buf = diag204_get_buffer(diag204_info_type, &pages);
+	if (IS_ERR(buf))
+		goto out;
+	rc = diag204_do_store(buf, pages);
+	if (rc)
+		return ERR_PTR(rc);
+out:
+	return buf;
+}
+
+/* Diagnose 224 functions */
+
+static int diag224(void *ptr)
+{
+	int rc = -EOPNOTSUPP;
+
+	asm volatile(
+		"	diag	%1,%2,0x224\n"
+		"0:	lhi	%0,0x0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+	return rc;
+}
+
+static int diag224_get_name_table(void)
+{
+	/* memory must be below 2GB */
+	diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+	if (!diag224_cpu_names)
+		return -ENOMEM;
+	if (diag224(diag224_cpu_names)) {
+		kfree(diag224_cpu_names);
+		return -EOPNOTSUPP;
+	}
+	EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
+	return 0;
+}
+
+static void diag224_delete_name_table(void)
+{
+	kfree(diag224_cpu_names);
+}
+
+static int diag224_idx2name(int index, char *name)
+{
+	memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN),
+		CPU_NAME_LEN);
+	name[CPU_NAME_LEN] = 0;
+	strim(name);
+	return 0;
+}
+
+struct dbfs_d204_hdr {
+	u64	len;		/* Length of d204 buffer without header */
+	u16	version;	/* Version of header */
+	u8	sc;		/* Used subcode */
+	char	reserved[53];
+} __attribute__ ((packed));
+
+struct dbfs_d204 {
+	struct dbfs_d204_hdr	hdr;	/* 64 byte header */
+	char			buf[];	/* d204 buffer */
+} __attribute__ ((packed));
+
+static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
+{
+	struct dbfs_d204 *d204;
+	int rc, buf_size;
+	void *base;
+
+	buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr);
+	base = vzalloc(buf_size);
+	if (!base)
+		return -ENOMEM;
+	d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr);
+	rc = diag204_do_store(d204->buf, diag204_buf_pages);
+	if (rc) {
+		vfree(base);
+		return rc;
+	}
+	d204->hdr.version = DBFS_D204_HDR_VERSION;
+	d204->hdr.len = PAGE_SIZE * diag204_buf_pages;
+	d204->hdr.sc = diag204_store_sc;
+	*data = d204;
+	*data_free_ptr = base;
+	*size = d204->hdr.len + sizeof(struct dbfs_d204_hdr);
+	return 0;
+}
+
+static struct hypfs_dbfs_file dbfs_file_d204 = {
+	.name		= "diag_204",
+	.data_create	= dbfs_d204_create,
+	.data_free	= vfree,
+};
+
+__init int hypfs_diag_init(void)
+{
+	int rc;
+
+	if (diag204_probe()) {
+		pr_err("The hardware system does not support hypfs\n");
+		return -ENODATA;
+	}
+	if (diag204_info_type == INFO_EXT) {
+		rc = hypfs_dbfs_create_file(&dbfs_file_d204);
+		if (rc)
+			return rc;
+	}
+	if (MACHINE_IS_LPAR) {
+		rc = diag224_get_name_table();
+		if (rc) {
+			pr_err("The hardware system does not provide all "
+			       "functions required by hypfs\n");
+			debugfs_remove(dbfs_d204_file);
+			return rc;
+		}
+	}
+	return 0;
+}
+
+void hypfs_diag_exit(void)
+{
+	debugfs_remove(dbfs_d204_file);
+	diag224_delete_name_table();
+	diag204_free_buffer();
+	hypfs_dbfs_remove_file(&dbfs_file_d204);
+}
+
+/*
+ * Functions to create the directory structure
+ * *******************************************
+ */
+
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+	struct dentry *cpu_dir;
+	char buffer[TMP_SIZE];
+	void *rc;
+
+	snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
+							    cpu_info));
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
+			      cpu_info__acc_time(diag204_info_type, cpu_info) -
+			      cpu_info__lp_time(diag204_info_type, cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	rc = hypfs_create_u64(cpu_dir, "cputime",
+			      cpu_info__lp_time(diag204_info_type, cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	if (diag204_info_type == INFO_EXT) {
+		rc = hypfs_create_u64(cpu_dir, "onlinetime",
+				      cpu_info__online_time(diag204_info_type,
+							    cpu_info));
+		if (IS_ERR(rc))
+			return PTR_ERR(rc);
+	}
+	diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
+	return PTR_RET(rc);
+}
+
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
+{
+	struct dentry *cpus_dir;
+	struct dentry *lpar_dir;
+	char lpar_name[LPAR_NAME_LEN + 1];
+	void *cpu_info;
+	int i;
+
+	part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
+	lpar_name[LPAR_NAME_LEN] = 0;
+	lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
+	if (IS_ERR(lpar_dir))
+		return lpar_dir;
+	cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return cpus_dir;
+	cpu_info = part_hdr + part_hdr__size(diag204_info_type);
+	for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
+		int rc;
+		rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
+		if (rc)
+			return ERR_PTR(rc);
+		cpu_info += cpu_info__size(diag204_info_type);
+	}
+	return cpu_info;
+}
+
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+	struct dentry *cpu_dir;
+	char buffer[TMP_SIZE];
+	void *rc;
+
+	snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
+							    cpu_info));
+	cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+	if (IS_ERR(cpu_dir))
+		return PTR_ERR(cpu_dir);
+	rc = hypfs_create_u64(cpu_dir, "mgmtime",
+			      phys_cpu__mgm_time(diag204_info_type, cpu_info));
+	if (IS_ERR(rc))
+		return PTR_ERR(rc);
+	diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
+	rc = hypfs_create_str(cpu_dir, "type", buffer);
+	return PTR_RET(rc);
+}
+
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
+{
+	int i;
+	void *cpu_info;
+	struct dentry *cpus_dir;
+
+	cpus_dir = hypfs_mkdir(parent_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return cpus_dir;
+	cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
+	for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
+		int rc;
+		rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
+		if (rc)
+			return ERR_PTR(rc);
+		cpu_info += phys_cpu__size(diag204_info_type);
+	}
+	return cpu_info;
+}
+
+int hypfs_diag_create_files(struct dentry *root)
+{
+	struct dentry *systems_dir, *hyp_dir;
+	void *time_hdr, *part_hdr;
+	int i, rc;
+	void *buffer, *ptr;
+
+	buffer = diag204_store();
+	if (IS_ERR(buffer))
+		return PTR_ERR(buffer);
+
+	systems_dir = hypfs_mkdir(root, "systems");
+	if (IS_ERR(systems_dir)) {
+		rc = PTR_ERR(systems_dir);
+		goto err_out;
+	}
+	time_hdr = (struct x_info_blk_hdr *)buffer;
+	part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
+	for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
+		part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
+		if (IS_ERR(part_hdr)) {
+			rc = PTR_ERR(part_hdr);
+			goto err_out;
+		}
+	}
+	if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
+		ptr = hypfs_create_phys_files(root, part_hdr);
+		if (IS_ERR(ptr)) {
+			rc = PTR_ERR(ptr);
+			goto err_out;
+		}
+	}
+	hyp_dir = hypfs_mkdir(root, "hyp");
+	if (IS_ERR(hyp_dir)) {
+		rc = PTR_ERR(hyp_dir);
+		goto err_out;
+	}
+	ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
+	if (IS_ERR(ptr)) {
+		rc = PTR_ERR(ptr);
+		goto err_out;
+	}
+	rc = 0;
+
+err_out:
+	return rc;
+}
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
new file mode 100644
index 000000000..24c747a0f
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -0,0 +1,135 @@
+/*
+ * Hypervisor filesystem for Linux on s390
+ *
+ * Diag 0C implementation
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <asm/hypfs.h>
+#include "hypfs.h"
+
+#define DBFS_D0C_HDR_VERSION 0
+
+/*
+ * Execute diagnose 0c in 31 bit mode
+ */
+static void diag0c(struct hypfs_diag0c_entry *entry)
+{
+	asm volatile (
+		"	sam31\n"
+		"	diag	%0,%0,0x0c\n"
+		"	sam64\n"
+		: /* no output register */
+		: "a" (entry)
+		: "memory");
+}
+
+/*
+ * Get hypfs_diag0c_entry from CPU vector and store diag0c data
+ */
+static void diag0c_fn(void *data)
+{
+	diag0c(((void **) data)[smp_processor_id()]);
+}
+
+/*
+ * Allocate buffer and store diag 0c data
+ */
+static void *diag0c_store(unsigned int *count)
+{
+	struct hypfs_diag0c_data *diag0c_data;
+	unsigned int cpu_count, cpu, i;
+	void **cpu_vec;
+
+	get_online_cpus();
+	cpu_count = num_online_cpus();
+	cpu_vec = kmalloc(sizeof(*cpu_vec) * num_possible_cpus(), GFP_KERNEL);
+	if (!cpu_vec)
+		goto fail_put_online_cpus;
+	/* Note: Diag 0c needs 8 byte alignment and real storage */
+	diag0c_data = kzalloc(sizeof(struct hypfs_diag0c_hdr) +
+			      cpu_count * sizeof(struct hypfs_diag0c_entry),
+			      GFP_KERNEL | GFP_DMA);
+	if (!diag0c_data)
+		goto fail_kfree_cpu_vec;
+	i = 0;
+	/* Fill CPU vector for each online CPU */
+	for_each_online_cpu(cpu) {
+		diag0c_data->entry[i].cpu = cpu;
+		cpu_vec[cpu] = &diag0c_data->entry[i++];
+	}
+	/* Collect data all CPUs */
+	on_each_cpu(diag0c_fn, cpu_vec, 1);
+	*count = cpu_count;
+	kfree(cpu_vec);
+	put_online_cpus();
+	return diag0c_data;
+
+fail_kfree_cpu_vec:
+	kfree(cpu_vec);
+fail_put_online_cpus:
+	put_online_cpus();
+	return ERR_PTR(-ENOMEM);
+}
+
+/*
+ * Hypfs DBFS callback: Free diag 0c data
+ */
+static void dbfs_diag0c_free(const void *data)
+{
+	kfree(data);
+}
+
+/*
+ * Hypfs DBFS callback: Create diag 0c data
+ */
+static int dbfs_diag0c_create(void **data, void **data_free_ptr, size_t *size)
+{
+	struct hypfs_diag0c_data *diag0c_data;
+	unsigned int count;
+
+	diag0c_data = diag0c_store(&count);
+	if (IS_ERR(diag0c_data))
+		return PTR_ERR(diag0c_data);
+	memset(&diag0c_data->hdr, 0, sizeof(diag0c_data->hdr));
+	get_tod_clock_ext(diag0c_data->hdr.tod_ext);
+	diag0c_data->hdr.len = count * sizeof(struct hypfs_diag0c_entry);
+	diag0c_data->hdr.version = DBFS_D0C_HDR_VERSION;
+	diag0c_data->hdr.count = count;
+	*data = diag0c_data;
+	*data_free_ptr = diag0c_data;
+	*size = diag0c_data->hdr.len + sizeof(struct hypfs_diag0c_hdr);
+	return 0;
+}
+
+/*
+ * Hypfs DBFS file structure
+ */
+static struct hypfs_dbfs_file dbfs_file_0c = {
+	.name		= "diag_0c",
+	.data_create	= dbfs_diag0c_create,
+	.data_free	= dbfs_diag0c_free,
+};
+
+/*
+ * Initialize diag 0c interface for z/VM
+ */
+int __init hypfs_diag0c_init(void)
+{
+	if (!MACHINE_IS_VM)
+		return 0;
+	return hypfs_dbfs_create_file(&dbfs_file_0c);
+}
+
+/*
+ * Shutdown diag 0c interface for z/VM
+ */
+void hypfs_diag0c_exit(void)
+{
+	if (!MACHINE_IS_VM)
+		return;
+	hypfs_dbfs_remove_file(&dbfs_file_0c);
+}
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
new file mode 100644
index 000000000..f043c3c7e
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -0,0 +1,141 @@
+/*
+ *    Hypervisor filesystem for Linux on s390.
+ *    Set Partition-Resource Parameter interface.
+ *
+ *    Copyright IBM Corp. 2013
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/compat.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <asm/compat.h>
+#include <asm/sclp.h>
+#include "hypfs.h"
+
+#define DIAG304_SET_WEIGHTS	0
+#define DIAG304_QUERY_PRP	1
+#define DIAG304_SET_CAPPING	2
+
+#define DIAG304_CMD_MAX		2
+
+static unsigned long hypfs_sprp_diag304(void *data, unsigned long cmd)
+{
+	register unsigned long _data asm("2") = (unsigned long) data;
+	register unsigned long _rc asm("3");
+	register unsigned long _cmd asm("4") = cmd;
+
+	asm volatile("diag %1,%2,0x304\n"
+		     : "=d" (_rc) : "d" (_data), "d" (_cmd) : "memory");
+
+	return _rc;
+}
+
+static void hypfs_sprp_free(const void *data)
+{
+	free_page((unsigned long) data);
+}
+
+static int hypfs_sprp_create(void **data_ptr, void **free_ptr, size_t *size)
+{
+	unsigned long rc;
+	void *data;
+
+	data = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	rc = hypfs_sprp_diag304(data, DIAG304_QUERY_PRP);
+	if (rc != 1) {
+		*data_ptr = *free_ptr = NULL;
+		*size = 0;
+		free_page((unsigned long) data);
+		return -EIO;
+	}
+	*data_ptr = *free_ptr = data;
+	*size = PAGE_SIZE;
+	return 0;
+}
+
+static int __hypfs_sprp_ioctl(void __user *user_area)
+{
+	struct hypfs_diag304 diag304;
+	unsigned long cmd;
+	void __user *udata;
+	void *data;
+	int rc;
+
+	if (copy_from_user(&diag304, user_area, sizeof(diag304)))
+		return -EFAULT;
+	if ((diag304.args[0] >> 8) != 0 || diag304.args[1] > DIAG304_CMD_MAX)
+		return -EINVAL;
+
+	data = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!data)
+		return -ENOMEM;
+
+	udata = (void __user *)(unsigned long) diag304.data;
+	if (diag304.args[1] == DIAG304_SET_WEIGHTS ||
+	    diag304.args[1] == DIAG304_SET_CAPPING)
+		if (copy_from_user(data, udata, PAGE_SIZE)) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+	cmd = *(unsigned long *) &diag304.args[0];
+	diag304.rc = hypfs_sprp_diag304(data, cmd);
+
+	if (diag304.args[1] == DIAG304_QUERY_PRP)
+		if (copy_to_user(udata, data, PAGE_SIZE)) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+	rc = copy_to_user(user_area, &diag304, sizeof(diag304)) ? -EFAULT : 0;
+out:
+	free_page((unsigned long) data);
+	return rc;
+}
+
+static long hypfs_sprp_ioctl(struct file *file, unsigned int cmd,
+			       unsigned long arg)
+{
+	void __user *argp;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	if (is_compat_task())
+		argp = compat_ptr(arg);
+	else
+		argp = (void __user *) arg;
+	switch (cmd) {
+	case HYPFS_DIAG304:
+		return __hypfs_sprp_ioctl(argp);
+	default: /* unknown ioctl number */
+		return -ENOTTY;
+	}
+	return 0;
+}
+
+static struct hypfs_dbfs_file hypfs_sprp_file = {
+	.name		= "diag_304",
+	.data_create	= hypfs_sprp_create,
+	.data_free	= hypfs_sprp_free,
+	.unlocked_ioctl = hypfs_sprp_ioctl,
+};
+
+int hypfs_sprp_init(void)
+{
+	if (!sclp_has_sprp())
+		return 0;
+	return hypfs_dbfs_create_file(&hypfs_sprp_file);
+}
+
+void hypfs_sprp_exit(void)
+{
+	if (!sclp_has_sprp())
+		return;
+	hypfs_dbfs_remove_file(&hypfs_sprp_file);
+}
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
new file mode 100644
index 000000000..afbe07907
--- /dev/null
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -0,0 +1,287 @@
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm/ebcdic.h>
+#include <asm/timex.h>
+#include "hypfs.h"
+
+#define NAME_LEN 8
+#define DBFS_D2FC_HDR_VERSION 0
+
+static char local_guest[] = "        ";
+static char all_guests[] = "*       ";
+static char *guest_query;
+
+struct diag2fc_data {
+	__u32 version;
+	__u32 flags;
+	__u64 used_cpu;
+	__u64 el_time;
+	__u64 mem_min_kb;
+	__u64 mem_max_kb;
+	__u64 mem_share_kb;
+	__u64 mem_used_kb;
+	__u32 pcpus;
+	__u32 lcpus;
+	__u32 vcpus;
+	__u32 ocpus;
+	__u32 cpu_max;
+	__u32 cpu_shares;
+	__u32 cpu_use_samp;
+	__u32 cpu_delay_samp;
+	__u32 page_wait_samp;
+	__u32 idle_samp;
+	__u32 other_samp;
+	__u32 total_samp;
+	char  guest_name[NAME_LEN];
+};
+
+struct diag2fc_parm_list {
+	char userid[NAME_LEN];
+	char aci_grp[NAME_LEN];
+	__u64 addr;
+	__u32 size;
+	__u32 fmt;
+};
+
+static int diag2fc(int size, char* query, void *addr)
+{
+	unsigned long residual_cnt;
+	unsigned long rc;
+	struct diag2fc_parm_list parm_list;
+
+	memcpy(parm_list.userid, query, NAME_LEN);
+	ASCEBC(parm_list.userid, NAME_LEN);
+	parm_list.addr = (unsigned long) addr ;
+	parm_list.size = size;
+	parm_list.fmt = 0x02;
+	memset(parm_list.aci_grp, 0x40, NAME_LEN);
+	rc = -1;
+
+	asm volatile(
+		"	diag    %0,%1,0x2fc\n"
+		"0:\n"
+		EX_TABLE(0b,0b)
+		: "=d" (residual_cnt), "+d" (rc) : "0" (&parm_list) : "memory");
+
+	if ((rc != 0 ) && (rc != -2))
+		return rc;
+	else
+		return -residual_cnt;
+}
+
+/*
+ * Allocate buffer for "query" and store diag 2fc at "offset"
+ */
+static void *diag2fc_store(char *query, unsigned int *count, int offset)
+{
+	void *data;
+	int size;
+
+	do {
+		size = diag2fc(0, query, NULL);
+		if (size < 0)
+			return ERR_PTR(-EACCES);
+		data = vmalloc(size + offset);
+		if (!data)
+			return ERR_PTR(-ENOMEM);
+		if (diag2fc(size, query, data + offset) == 0)
+			break;
+		vfree(data);
+	} while (1);
+	*count = (size / sizeof(struct diag2fc_data));
+
+	return data;
+}
+
+static void diag2fc_free(const void *data)
+{
+	vfree(data);
+}
+
+#define ATTRIBUTE(dir, name, member) \
+do { \
+	void *rc; \
+	rc = hypfs_create_u64(dir, name, member); \
+	if (IS_ERR(rc)) \
+		return PTR_ERR(rc); \
+} while(0)
+
+static int hpyfs_vm_create_guest(struct dentry *systems_dir,
+				 struct diag2fc_data *data)
+{
+	char guest_name[NAME_LEN + 1] = {};
+	struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
+	int dedicated_flag, capped_value;
+
+	capped_value = (data->flags & 0x00000006) >> 1;
+	dedicated_flag = (data->flags & 0x00000008) >> 3;
+
+	/* guest dir */
+	memcpy(guest_name, data->guest_name, NAME_LEN);
+	EBCASC(guest_name, NAME_LEN);
+	strim(guest_name);
+	guest_dir = hypfs_mkdir(systems_dir, guest_name);
+	if (IS_ERR(guest_dir))
+		return PTR_ERR(guest_dir);
+	ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
+
+	/* logical cpu information */
+	cpus_dir = hypfs_mkdir(guest_dir, "cpus");
+	if (IS_ERR(cpus_dir))
+		return PTR_ERR(cpus_dir);
+	ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+	ATTRIBUTE(cpus_dir, "capped", capped_value);
+	ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+	ATTRIBUTE(cpus_dir, "count", data->vcpus);
+	/*
+	 * Note: The "weight_min" attribute got the wrong name.
+	 * The value represents the number of non-stopped (operating)
+	 * CPUS.
+	 */
+	ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
+	ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+	ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
+
+	/* memory information */
+	mem_dir = hypfs_mkdir(guest_dir, "mem");
+	if (IS_ERR(mem_dir))
+		return PTR_ERR(mem_dir);
+	ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+	ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+	ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+	ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
+
+	/* samples */
+	samples_dir = hypfs_mkdir(guest_dir, "samples");
+	if (IS_ERR(samples_dir))
+		return PTR_ERR(samples_dir);
+	ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+	ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+	ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+	ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+	ATTRIBUTE(samples_dir, "other", data->other_samp);
+	ATTRIBUTE(samples_dir, "total", data->total_samp);
+	return 0;
+}
+
+int hypfs_vm_create_files(struct dentry *root)
+{
+	struct dentry *dir, *file;
+	struct diag2fc_data *data;
+	unsigned int count = 0;
+	int rc, i;
+
+	data = diag2fc_store(guest_query, &count, 0);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	/* Hpervisor Info */
+	dir = hypfs_mkdir(root, "hyp");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+	file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
+	if (IS_ERR(file)) {
+		rc = PTR_ERR(file);
+		goto failed;
+	}
+
+	/* physical cpus */
+	dir = hypfs_mkdir(root, "cpus");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+	file = hypfs_create_u64(dir, "count", data->lcpus);
+	if (IS_ERR(file)) {
+		rc = PTR_ERR(file);
+		goto failed;
+	}
+
+	/* guests */
+	dir = hypfs_mkdir(root, "systems");
+	if (IS_ERR(dir)) {
+		rc = PTR_ERR(dir);
+		goto failed;
+	}
+
+	for (i = 0; i < count; i++) {
+		rc = hpyfs_vm_create_guest(dir, &(data[i]));
+		if (rc)
+			goto failed;
+	}
+	diag2fc_free(data);
+	return 0;
+
+failed:
+	diag2fc_free(data);
+	return rc;
+}
+
+struct dbfs_d2fc_hdr {
+	u64	len;		/* Length of d2fc buffer without header */
+	u16	version;	/* Version of header */
+	char	tod_ext[STORE_CLOCK_EXT_SIZE]; /* TOD clock for d2fc */
+	u64	count;		/* Number of VM guests in d2fc buffer */
+	char	reserved[30];
+} __attribute__ ((packed));
+
+struct dbfs_d2fc {
+	struct dbfs_d2fc_hdr	hdr;	/* 64 byte header */
+	char			buf[];	/* d2fc buffer */
+} __attribute__ ((packed));
+
+static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size)
+{
+	struct dbfs_d2fc *d2fc;
+	unsigned int count;
+
+	d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr));
+	if (IS_ERR(d2fc))
+		return PTR_ERR(d2fc);
+	get_tod_clock_ext(d2fc->hdr.tod_ext);
+	d2fc->hdr.len = count * sizeof(struct diag2fc_data);
+	d2fc->hdr.version = DBFS_D2FC_HDR_VERSION;
+	d2fc->hdr.count = count;
+	memset(&d2fc->hdr.reserved, 0, sizeof(d2fc->hdr.reserved));
+	*data = d2fc;
+	*data_free_ptr = d2fc;
+	*size = d2fc->hdr.len + sizeof(struct dbfs_d2fc_hdr);
+	return 0;
+}
+
+static struct hypfs_dbfs_file dbfs_file_2fc = {
+	.name		= "diag_2fc",
+	.data_create	= dbfs_diag2fc_create,
+	.data_free	= diag2fc_free,
+};
+
+int hypfs_vm_init(void)
+{
+	if (!MACHINE_IS_VM)
+		return 0;
+	if (diag2fc(0, all_guests, NULL) > 0)
+		guest_query = all_guests;
+	else if (diag2fc(0, local_guest, NULL) > 0)
+		guest_query = local_guest;
+	else
+		return -EACCES;
+	return hypfs_dbfs_create_file(&dbfs_file_2fc);
+}
+
+void hypfs_vm_exit(void)
+{
+	if (!MACHINE_IS_VM)
+		return;
+	hypfs_dbfs_remove_file(&dbfs_file_2fc);
+}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
new file mode 100644
index 000000000..2eeb0a0f5
--- /dev/null
+++ b/arch/s390/hypfs/inode.c
@@ -0,0 +1,522 @@
+/*
+ *    Hypervisor filesystem for Linux on s390.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/time.h>
+#include <linux/parser.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/uio.h>
+#include <asm/ebcdic.h>
+#include "hypfs.h"
+
+#define HYPFS_MAGIC 0x687970	/* ASCII 'hyp' */
+#define TMP_SIZE 64		/* size of temporary buffers */
+
+static struct dentry *hypfs_create_update_file(struct dentry *dir);
+
+struct hypfs_sb_info {
+	kuid_t uid;			/* uid used for files and dirs */
+	kgid_t gid;			/* gid used for files and dirs */
+	struct dentry *update_file;	/* file to trigger update */
+	time_t last_update;		/* last update time in secs since 1970 */
+	struct mutex lock;		/* lock to protect update process */
+};
+
+static const struct file_operations hypfs_file_ops;
+static struct file_system_type hypfs_type;
+static const struct super_operations hypfs_s_ops;
+
+/* start of list of all dentries, which have to be deleted on update */
+static struct dentry *hypfs_last_dentry;
+
+static void hypfs_update_update(struct super_block *sb)
+{
+	struct hypfs_sb_info *sb_info = sb->s_fs_info;
+	struct inode *inode = d_inode(sb_info->update_file);
+
+	sb_info->last_update = get_seconds();
+	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+}
+
+/* directory tree removal functions */
+
+static void hypfs_add_dentry(struct dentry *dentry)
+{
+	dentry->d_fsdata = hypfs_last_dentry;
+	hypfs_last_dentry = dentry;
+}
+
+static inline int hypfs_positive(struct dentry *dentry)
+{
+	return d_really_is_positive(dentry) && !d_unhashed(dentry);
+}
+
+static void hypfs_remove(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	parent = dentry->d_parent;
+	mutex_lock(&d_inode(parent)->i_mutex);
+	if (hypfs_positive(dentry)) {
+		if (d_is_dir(dentry))
+			simple_rmdir(d_inode(parent), dentry);
+		else
+			simple_unlink(d_inode(parent), dentry);
+	}
+	d_delete(dentry);
+	dput(dentry);
+	mutex_unlock(&d_inode(parent)->i_mutex);
+}
+
+static void hypfs_delete_tree(struct dentry *root)
+{
+	while (hypfs_last_dentry) {
+		struct dentry *next_dentry;
+		next_dentry = hypfs_last_dentry->d_fsdata;
+		hypfs_remove(hypfs_last_dentry);
+		hypfs_last_dentry = next_dentry;
+	}
+}
+
+static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
+{
+	struct inode *ret = new_inode(sb);
+
+	if (ret) {
+		struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
+		ret->i_ino = get_next_ino();
+		ret->i_mode = mode;
+		ret->i_uid = hypfs_info->uid;
+		ret->i_gid = hypfs_info->gid;
+		ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
+		if (S_ISDIR(mode))
+			set_nlink(ret, 2);
+	}
+	return ret;
+}
+
+static void hypfs_evict_inode(struct inode *inode)
+{
+	clear_inode(inode);
+	kfree(inode->i_private);
+}
+
+static int hypfs_open(struct inode *inode, struct file *filp)
+{
+	char *data = file_inode(filp)->i_private;
+	struct hypfs_sb_info *fs_info;
+
+	if (filp->f_mode & FMODE_WRITE) {
+		if (!(inode->i_mode & S_IWUGO))
+			return -EACCES;
+	}
+	if (filp->f_mode & FMODE_READ) {
+		if (!(inode->i_mode & S_IRUGO))
+			return -EACCES;
+	}
+
+	fs_info = inode->i_sb->s_fs_info;
+	if(data) {
+		mutex_lock(&fs_info->lock);
+		filp->private_data = kstrdup(data, GFP_KERNEL);
+		if (!filp->private_data) {
+			mutex_unlock(&fs_info->lock);
+			return -ENOMEM;
+		}
+		mutex_unlock(&fs_info->lock);
+	}
+	return nonseekable_open(inode, filp);
+}
+
+static ssize_t hypfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	char *data = file->private_data;
+	size_t available = strlen(data);
+	loff_t pos = iocb->ki_pos;
+	size_t count;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= available || !iov_iter_count(to))
+		return 0;
+	count = copy_to_iter(data + pos, available - pos, to);
+	if (!count)
+		return -EFAULT;
+	iocb->ki_pos = pos + count;
+	file_accessed(file);
+	return count;
+}
+
+static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	int rc;
+	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
+	struct hypfs_sb_info *fs_info = sb->s_fs_info;
+	size_t count = iov_iter_count(from);
+
+	/*
+	 * Currently we only allow one update per second for two reasons:
+	 * 1. diag 204 is VERY expensive
+	 * 2. If several processes do updates in parallel and then read the
+	 *    hypfs data, the likelihood of collisions is reduced, if we restrict
+	 *    the minimum update interval. A collision occurs, if during the
+	 *    data gathering of one process another process triggers an update
+	 *    If the first process wants to ensure consistent data, it has
+	 *    to restart data collection in this case.
+	 */
+	mutex_lock(&fs_info->lock);
+	if (fs_info->last_update == get_seconds()) {
+		rc = -EBUSY;
+		goto out;
+	}
+	hypfs_delete_tree(sb->s_root);
+	if (MACHINE_IS_VM)
+		rc = hypfs_vm_create_files(sb->s_root);
+	else
+		rc = hypfs_diag_create_files(sb->s_root);
+	if (rc) {
+		pr_err("Updating the hypfs tree failed\n");
+		hypfs_delete_tree(sb->s_root);
+		goto out;
+	}
+	hypfs_update_update(sb);
+	rc = count;
+	iov_iter_advance(from, count);
+out:
+	mutex_unlock(&fs_info->lock);
+	return rc;
+}
+
+static int hypfs_release(struct inode *inode, struct file *filp)
+{
+	kfree(filp->private_data);
+	return 0;
+}
+
+enum { opt_uid, opt_gid, opt_err };
+
+static const match_table_t hypfs_tokens = {
+	{opt_uid, "uid=%u"},
+	{opt_gid, "gid=%u"},
+	{opt_err, NULL}
+};
+
+static int hypfs_parse_options(char *options, struct super_block *sb)
+{
+	char *str;
+	substring_t args[MAX_OPT_ARGS];
+	kuid_t uid;
+	kgid_t gid;
+
+	if (!options)
+		return 0;
+	while ((str = strsep(&options, ",")) != NULL) {
+		int token, option;
+		struct hypfs_sb_info *hypfs_info = sb->s_fs_info;
+
+		if (!*str)
+			continue;
+		token = match_token(str, hypfs_tokens, args);
+		switch (token) {
+		case opt_uid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(uid))
+				return -EINVAL;
+			hypfs_info->uid = uid;
+			break;
+		case opt_gid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(gid))
+				return -EINVAL;
+			hypfs_info->gid = gid;
+			break;
+		case opt_err:
+		default:
+			pr_err("%s is not a valid mount option\n", str);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int hypfs_show_options(struct seq_file *s, struct dentry *root)
+{
+	struct hypfs_sb_info *hypfs_info = root->d_sb->s_fs_info;
+
+	seq_printf(s, ",uid=%u", from_kuid_munged(&init_user_ns, hypfs_info->uid));
+	seq_printf(s, ",gid=%u", from_kgid_munged(&init_user_ns, hypfs_info->gid));
+	return 0;
+}
+
+static int hypfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *root_inode;
+	struct dentry *root_dentry;
+	int rc = 0;
+	struct hypfs_sb_info *sbi;
+
+	sbi = kzalloc(sizeof(struct hypfs_sb_info), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+	mutex_init(&sbi->lock);
+	sbi->uid = current_uid();
+	sbi->gid = current_gid();
+	sb->s_fs_info = sbi;
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = HYPFS_MAGIC;
+	sb->s_op = &hypfs_s_ops;
+	if (hypfs_parse_options(data, sb))
+		return -EINVAL;
+	root_inode = hypfs_make_inode(sb, S_IFDIR | 0755);
+	if (!root_inode)
+		return -ENOMEM;
+	root_inode->i_op = &simple_dir_inode_operations;
+	root_inode->i_fop = &simple_dir_operations;
+	sb->s_root = root_dentry = d_make_root(root_inode);
+	if (!root_dentry)
+		return -ENOMEM;
+	if (MACHINE_IS_VM)
+		rc = hypfs_vm_create_files(root_dentry);
+	else
+		rc = hypfs_diag_create_files(root_dentry);
+	if (rc)
+		return rc;
+	sbi->update_file = hypfs_create_update_file(root_dentry);
+	if (IS_ERR(sbi->update_file))
+		return PTR_ERR(sbi->update_file);
+	hypfs_update_update(sb);
+	pr_info("Hypervisor filesystem mounted\n");
+	return 0;
+}
+
+static struct dentry *hypfs_mount(struct file_system_type *fst, int flags,
+			const char *devname, void *data)
+{
+	return mount_single(fst, flags, data, hypfs_fill_super);
+}
+
+static void hypfs_kill_super(struct super_block *sb)
+{
+	struct hypfs_sb_info *sb_info = sb->s_fs_info;
+
+	if (sb->s_root)
+		hypfs_delete_tree(sb->s_root);
+	if (sb_info->update_file)
+		hypfs_remove(sb_info->update_file);
+	kfree(sb->s_fs_info);
+	sb->s_fs_info = NULL;
+	kill_litter_super(sb);
+}
+
+static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
+					char *data, umode_t mode)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+
+	mutex_lock(&d_inode(parent)->i_mutex);
+	dentry = lookup_one_len(name, parent, strlen(name));
+	if (IS_ERR(dentry)) {
+		dentry = ERR_PTR(-ENOMEM);
+		goto fail;
+	}
+	inode = hypfs_make_inode(parent->d_sb, mode);
+	if (!inode) {
+		dput(dentry);
+		dentry = ERR_PTR(-ENOMEM);
+		goto fail;
+	}
+	if (S_ISREG(mode)) {
+		inode->i_fop = &hypfs_file_ops;
+		if (data)
+			inode->i_size = strlen(data);
+		else
+			inode->i_size = 0;
+	} else if (S_ISDIR(mode)) {
+		inode->i_op = &simple_dir_inode_operations;
+		inode->i_fop = &simple_dir_operations;
+		inc_nlink(d_inode(parent));
+	} else
+		BUG();
+	inode->i_private = data;
+	d_instantiate(dentry, inode);
+	dget(dentry);
+fail:
+	mutex_unlock(&d_inode(parent)->i_mutex);
+	return dentry;
+}
+
+struct dentry *hypfs_mkdir(struct dentry *parent, const char *name)
+{
+	struct dentry *dentry;
+
+	dentry = hypfs_create_file(parent, name, NULL, S_IFDIR | DIR_MODE);
+	if (IS_ERR(dentry))
+		return dentry;
+	hypfs_add_dentry(dentry);
+	return dentry;
+}
+
+static struct dentry *hypfs_create_update_file(struct dentry *dir)
+{
+	struct dentry *dentry;
+
+	dentry = hypfs_create_file(dir, "update", NULL,
+				   S_IFREG | UPDATE_FILE_MODE);
+	/*
+	 * We do not put the update file on the 'delete' list with
+	 * hypfs_add_dentry(), since it should not be removed when the tree
+	 * is updated.
+	 */
+	return dentry;
+}
+
+struct dentry *hypfs_create_u64(struct dentry *dir,
+				const char *name, __u64 value)
+{
+	char *buffer;
+	char tmp[TMP_SIZE];
+	struct dentry *dentry;
+
+	snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value);
+	buffer = kstrdup(tmp, GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	dentry =
+	    hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
+	if (IS_ERR(dentry)) {
+		kfree(buffer);
+		return ERR_PTR(-ENOMEM);
+	}
+	hypfs_add_dentry(dentry);
+	return dentry;
+}
+
+struct dentry *hypfs_create_str(struct dentry *dir,
+				const char *name, char *string)
+{
+	char *buffer;
+	struct dentry *dentry;
+
+	buffer = kmalloc(strlen(string) + 2, GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	sprintf(buffer, "%s\n", string);
+	dentry =
+	    hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE);
+	if (IS_ERR(dentry)) {
+		kfree(buffer);
+		return ERR_PTR(-ENOMEM);
+	}
+	hypfs_add_dentry(dentry);
+	return dentry;
+}
+
+static const struct file_operations hypfs_file_ops = {
+	.open		= hypfs_open,
+	.release	= hypfs_release,
+	.read_iter	= hypfs_read_iter,
+	.write_iter	= hypfs_write_iter,
+	.llseek		= no_llseek,
+};
+
+static struct file_system_type hypfs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "s390_hypfs",
+	.mount		= hypfs_mount,
+	.kill_sb	= hypfs_kill_super
+};
+MODULE_ALIAS_FS("s390_hypfs");
+
+static const struct super_operations hypfs_s_ops = {
+	.statfs		= simple_statfs,
+	.evict_inode	= hypfs_evict_inode,
+	.show_options	= hypfs_show_options,
+};
+
+static int __init hypfs_init(void)
+{
+	int rc;
+
+	rc = hypfs_dbfs_init();
+	if (rc)
+		return rc;
+	if (hypfs_diag_init()) {
+		rc = -ENODATA;
+		goto fail_dbfs_exit;
+	}
+	if (hypfs_vm_init()) {
+		rc = -ENODATA;
+		goto fail_hypfs_diag_exit;
+	}
+	if (hypfs_sprp_init()) {
+		rc = -ENODATA;
+		goto fail_hypfs_vm_exit;
+	}
+	if (hypfs_diag0c_init()) {
+		rc = -ENODATA;
+		goto fail_hypfs_sprp_exit;
+	}
+	rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
+	if (rc)
+		goto fail_hypfs_diag0c_exit;
+	rc = register_filesystem(&hypfs_type);
+	if (rc)
+		goto fail_filesystem;
+	return 0;
+
+fail_filesystem:
+	sysfs_remove_mount_point(hypervisor_kobj, "s390");
+fail_hypfs_diag0c_exit:
+	hypfs_diag0c_exit();
+fail_hypfs_sprp_exit:
+	hypfs_sprp_exit();
+fail_hypfs_vm_exit:
+	hypfs_vm_exit();
+fail_hypfs_diag_exit:
+	hypfs_diag_exit();
+fail_dbfs_exit:
+	hypfs_dbfs_exit();
+	pr_err("Initialization of hypfs failed with rc=%i\n", rc);
+	return rc;
+}
+
+static void __exit hypfs_exit(void)
+{
+	unregister_filesystem(&hypfs_type);
+	sysfs_remove_mount_point(hypervisor_kobj, "s390");
+	hypfs_diag0c_exit();
+	hypfs_sprp_exit();
+	hypfs_vm_exit();
+	hypfs_diag_exit();
+	hypfs_dbfs_exit();
+}
+
+module_init(hypfs_init)
+module_exit(hypfs_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Holzheu <holzheu@de.ibm.com>");
+MODULE_DESCRIPTION("s390 Hypervisor Filesystem");
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
new file mode 100644
index 000000000..c631f98fd
--- /dev/null
+++ b/arch/s390/include/asm/Kbuild
@@ -0,0 +1,8 @@
+
+
+generic-y += clkdev.h
+generic-y += irq_work.h
+generic-y += mcs_spinlock.h
+generic-y += preempt.h
+generic-y += scatterlist.h
+generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
new file mode 100644
index 000000000..bd93ff666
--- /dev/null
+++ b/arch/s390/include/asm/airq.h
@@ -0,0 +1,103 @@
+/*
+ *    Copyright IBM Corp. 2002, 2007
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>
+ *		 Arnd Bergmann <arndb@de.ibm.com>
+ *		 Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_AIRQ_H
+#define _ASM_S390_AIRQ_H
+
+#include <linux/bit_spinlock.h>
+
+struct airq_struct {
+	struct hlist_node list;		/* Handler queueing. */
+	void (*handler)(struct airq_struct *);	/* Thin-interrupt handler */
+	u8 *lsi_ptr;			/* Local-Summary-Indicator pointer */
+	u8 lsi_mask;			/* Local-Summary-Indicator mask */
+	u8 isc;				/* Interrupt-subclass */
+	u8 flags;
+};
+
+#define AIRQ_PTR_ALLOCATED	0x01
+
+int register_adapter_interrupt(struct airq_struct *airq);
+void unregister_adapter_interrupt(struct airq_struct *airq);
+
+/* Adapter interrupt bit vector */
+struct airq_iv {
+	unsigned long *vector;	/* Adapter interrupt bit vector */
+	unsigned long *avail;	/* Allocation bit mask for the bit vector */
+	unsigned long *bitlock;	/* Lock bit mask for the bit vector */
+	unsigned long *ptr;	/* Pointer associated with each bit */
+	unsigned int *data;	/* 32 bit value associated with each bit */
+	unsigned long bits;	/* Number of bits in the vector */
+	unsigned long end;	/* Number of highest allocated bit + 1 */
+	spinlock_t lock;	/* Lock to protect alloc & free */
+};
+
+#define AIRQ_IV_ALLOC	1	/* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK	2	/* Allocate the lock bit mask */
+#define AIRQ_IV_PTR	4	/* Allocate the ptr array */
+#define AIRQ_IV_DATA	8	/* Allocate the data array */
+
+struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
+void airq_iv_release(struct airq_iv *iv);
+unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num);
+void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num);
+unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
+			   unsigned long end);
+
+static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv)
+{
+	return airq_iv_alloc(iv, 1);
+}
+
+static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit)
+{
+	airq_iv_free(iv, bit, 1);
+}
+
+static inline unsigned long airq_iv_end(struct airq_iv *iv)
+{
+	return iv->end;
+}
+
+static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	bit_spin_lock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit)
+{
+	const unsigned long be_to_le = BITS_PER_LONG - 1;
+	bit_spin_unlock(bit ^ be_to_le, iv->bitlock);
+}
+
+static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit,
+				    unsigned int data)
+{
+	iv->data[bit] = data;
+}
+
+static inline unsigned int airq_iv_get_data(struct airq_iv *iv,
+					    unsigned long bit)
+{
+	return iv->data[bit];
+}
+
+static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit,
+				   unsigned long ptr)
+{
+	iv->ptr[bit] = ptr;
+}
+
+static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv,
+					    unsigned long bit)
+{
+	return iv->ptr[bit];
+}
+
+#endif /* _ASM_S390_AIRQ_H */
diff --git a/arch/s390/include/asm/appldata.h b/arch/s390/include/asm/appldata.h
new file mode 100644
index 000000000..16887c5fd
--- /dev/null
+++ b/arch/s390/include/asm/appldata.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright IBM Corp. 2006
+ *
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ */
+
+#ifndef _ASM_S390_APPLDATA_H
+#define _ASM_S390_APPLDATA_H
+
+#include <asm/io.h>
+
+#define APPLDATA_START_INTERVAL_REC	0x80
+#define APPLDATA_STOP_REC		0x81
+#define APPLDATA_GEN_EVENT_REC		0x82
+#define APPLDATA_START_CONFIG_REC	0x83
+
+/*
+ * Parameter list for DIAGNOSE X'DC'
+ */
+struct appldata_parameter_list {
+	u16 diag;
+	u8  function;
+	u8  parlist_length;
+	u32 unused01;
+	u16 reserved;
+	u16 buffer_length;
+	u32 unused02;
+	u64 product_id_addr;
+	u64 buffer_addr;
+} __attribute__ ((packed));
+
+struct appldata_product_id {
+	char prod_nr[7];	/* product number */
+	u16  prod_fn;		/* product function */
+	u8   record_nr; 	/* record number */
+	u16  version_nr;	/* version */
+	u16  release_nr;	/* release */
+	u16  mod_lvl;		/* modification level */
+} __attribute__ ((packed));
+
+static inline int appldata_asm(struct appldata_product_id *id,
+			       unsigned short fn, void *buffer,
+			       unsigned short length)
+{
+	struct appldata_parameter_list parm_list;
+	int ry;
+
+	if (!MACHINE_IS_VM)
+		return -EOPNOTSUPP;
+	parm_list.diag = 0xdc;
+	parm_list.function = fn;
+	parm_list.parlist_length = sizeof(parm_list);
+	parm_list.buffer_length = length;
+	parm_list.product_id_addr = (unsigned long) id;
+	parm_list.buffer_addr = virt_to_phys(buffer);
+	asm volatile(
+		"	diag	%1,%0,0xdc"
+		: "=d" (ry)
+		: "d" (&parm_list), "m" (parm_list), "m" (*id)
+		: "cc");
+	return ry;
+}
+
+#endif /* _ASM_S390_APPLDATA_H */
diff --git a/arch/s390/include/asm/asm-offsets.h b/arch/s390/include/asm/asm-offsets.h
new file mode 100644
index 000000000..d370ee36a
--- /dev/null
+++ b/arch/s390/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
new file mode 100644
index 000000000..adbe3802e
--- /dev/null
+++ b/arch/s390/include/asm/atomic.h
@@ -0,0 +1,320 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *	      Denis Joseph Barrow,
+ *	      Arnd Bergmann <arndb@de.ibm.com>,
+ *
+ * Atomic operations that C can't guarantee us.
+ * Useful for resource counting etc.
+ * s390 uses 'Compare And Swap' for atomicity in SMP environment.
+ *
+ */
+
+#ifndef __ARCH_S390_ATOMIC__
+#define __ARCH_S390_ATOMIC__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+#include <asm/cmpxchg.h>
+
+#define ATOMIC_INIT(i)  { (i) }
+
+#define __ATOMIC_NO_BARRIER	"\n"
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC_OR	"lao"
+#define __ATOMIC_AND	"lan"
+#define __ATOMIC_ADD	"laa"
+#define __ATOMIC_BARRIER "bcr	14,0\n"
+
+#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier)		\
+({									\
+	int old_val;							\
+									\
+	typecheck(atomic_t *, ptr);					\
+	asm volatile(							\
+		__barrier						\
+		op_string "	%0,%2,%1\n"				\
+		__barrier						\
+		: "=d" (old_val), "+Q" ((ptr)->counter)			\
+		: "d" (op_val)						\
+		: "cc", "memory");					\
+	old_val;							\
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC_OR	"or"
+#define __ATOMIC_AND	"nr"
+#define __ATOMIC_ADD	"ar"
+#define __ATOMIC_BARRIER "\n"
+
+#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier)		\
+({									\
+	int old_val, new_val;						\
+									\
+	typecheck(atomic_t *, ptr);					\
+	asm volatile(							\
+		"	l	%0,%2\n"				\
+		"0:	lr	%1,%0\n"				\
+		op_string "	%1,%3\n"				\
+		"	cs	%0,%1,%2\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\
+		: "d" (op_val)						\
+		: "cc", "memory");					\
+	old_val;							\
+})
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+static inline int atomic_read(const atomic_t *v)
+{
+	int c;
+
+	asm volatile(
+		"	l	%0,%1\n"
+		: "=d" (c) : "Q" (v->counter));
+	return c;
+}
+
+static inline void atomic_set(atomic_t *v, int i)
+{
+	asm volatile(
+		"	st	%1,%0\n"
+		: "=Q" (v->counter) : "d" (i));
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+	if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+		asm volatile(
+			"asi	%0,%1\n"
+			: "+Q" (v->counter)
+			: "i" (i)
+			: "cc", "memory");
+		return;
+	}
+#endif
+	__ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_NO_BARRIER);
+}
+
+#define atomic_add_negative(_i, _v)	(atomic_add_return(_i, _v) < 0)
+#define atomic_inc(_v)			atomic_add(1, _v)
+#define atomic_inc_return(_v)		atomic_add_return(1, _v)
+#define atomic_inc_and_test(_v)		(atomic_add_return(1, _v) == 0)
+#define atomic_sub(_i, _v)		atomic_add(-(int)(_i), _v)
+#define atomic_sub_return(_i, _v)	atomic_add_return(-(int)(_i), _v)
+#define atomic_sub_and_test(_i, _v)	(atomic_sub_return(_i, _v) == 0)
+#define atomic_dec(_v)			atomic_sub(1, _v)
+#define atomic_dec_return(_v)		atomic_sub_return(1, _v)
+#define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	__ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER);
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	__ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER);
+}
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	asm volatile(
+		"	cs	%0,%2,%1"
+		: "+d" (old), "+Q" (v->counter)
+		: "d" (new)
+		: "cc", "memory");
+	return old;
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+	c = atomic_read(v);
+	for (;;) {
+		if (unlikely(c == u))
+			break;
+		old = atomic_cmpxchg(v, c, c + a);
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c;
+}
+
+
+#undef __ATOMIC_LOOP
+
+#define ATOMIC64_INIT(i)  { (i) }
+
+#define __ATOMIC64_NO_BARRIER	"\n"
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC64_OR	"laog"
+#define __ATOMIC64_AND	"lang"
+#define __ATOMIC64_ADD	"laag"
+#define __ATOMIC64_BARRIER "bcr	14,0\n"
+
+#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier)		\
+({									\
+	long long old_val;						\
+									\
+	typecheck(atomic64_t *, ptr);					\
+	asm volatile(							\
+		__barrier						\
+		op_string "	%0,%2,%1\n"				\
+		__barrier						\
+		: "=d" (old_val), "+Q" ((ptr)->counter)			\
+		: "d" (op_val)						\
+		: "cc", "memory");					\
+	old_val;							\
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC64_OR	"ogr"
+#define __ATOMIC64_AND	"ngr"
+#define __ATOMIC64_ADD	"agr"
+#define __ATOMIC64_BARRIER "\n"
+
+#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier)		\
+({									\
+	long long old_val, new_val;					\
+									\
+	typecheck(atomic64_t *, ptr);					\
+	asm volatile(							\
+		"	lg	%0,%2\n"				\
+		"0:	lgr	%1,%0\n"				\
+		op_string "	%1,%3\n"				\
+		"	csg	%0,%1,%2\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\
+		: "d" (op_val)						\
+		: "cc", "memory");					\
+	old_val;							\
+})
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+static inline long long atomic64_read(const atomic64_t *v)
+{
+	long long c;
+
+	asm volatile(
+		"	lg	%0,%1\n"
+		: "=d" (c) : "Q" (v->counter));
+	return c;
+}
+
+static inline void atomic64_set(atomic64_t *v, long long i)
+{
+	asm volatile(
+		"	stg	%1,%0\n"
+		: "=Q" (v->counter) : "d" (i));
+}
+
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
+{
+	return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
+}
+
+static inline void atomic64_add(long long i, atomic64_t *v)
+{
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+	if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+		asm volatile(
+			"agsi	%0,%1\n"
+			: "+Q" (v->counter)
+			: "i" (i)
+			: "cc", "memory");
+		return;
+	}
+#endif
+	__ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER);
+}
+
+static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
+{
+	__ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER);
+}
+
+static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
+{
+	__ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER);
+}
+
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline long long atomic64_cmpxchg(atomic64_t *v,
+					     long long old, long long new)
+{
+	asm volatile(
+		"	csg	%0,%2,%1"
+		: "+d" (old), "+Q" (v->counter)
+		: "d" (new)
+		: "cc", "memory");
+	return old;
+}
+
+#undef __ATOMIC64_LOOP
+
+static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
+{
+	long long c, old;
+
+	c = atomic64_read(v);
+	for (;;) {
+		if (unlikely(c == u))
+			break;
+		old = atomic64_cmpxchg(v, c, c + i);
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return c != u;
+}
+
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long long c, old, dec;
+
+	c = atomic64_read(v);
+	for (;;) {
+		dec = c - 1;
+		if (unlikely(dec < 0))
+			break;
+		old = atomic64_cmpxchg((v), c, dec);
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return dec;
+}
+
+#define atomic64_add_negative(_i, _v)	(atomic64_add_return(_i, _v) < 0)
+#define atomic64_inc(_v)		atomic64_add(1, _v)
+#define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
+#define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
+#define atomic64_sub_return(_i, _v)	atomic64_add_return(-(long long)(_i), _v)
+#define atomic64_sub(_i, _v)		atomic64_add(-(long long)(_i), _v)
+#define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
+#define atomic64_dec(_v)		atomic64_sub(1, _v)
+#define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
+#define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
+
+#endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
new file mode 100644
index 000000000..8d724718e
--- /dev/null
+++ b/arch/s390/include/asm/barrier.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* Fast-BCR without checkpoint synchronization */
+#define __ASM_BARRIER "bcr 14,0\n"
+#else
+#define __ASM_BARRIER "bcr 15,0\n"
+#endif
+
+#define mb() do {  asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
+#define rmb()				mb()
+#define wmb()				mb()
+#define dma_rmb()			rmb()
+#define dma_wmb()			wmb()
+#define smp_mb()			mb()
+#define smp_rmb()			rmb()
+#define smp_wmb()			wmb()
+
+#define read_barrier_depends()		do { } while (0)
+#define smp_read_barrier_depends()	do { } while (0)
+
+#define smp_mb__before_atomic()		smp_mb()
+#define smp_mb__after_atomic()		smp_mb()
+
+#define set_mb(var, value)		do { var = value; mb(); } while (0)
+
+#define smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	ACCESS_ONCE(*p) = (v);						\
+} while (0)
+
+#define smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
+#endif /* __ASM_BARRIER_H */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
new file mode 100644
index 000000000..9b68e98a7
--- /dev/null
+++ b/arch/s390/include/asm/bitops.h
@@ -0,0 +1,454 @@
+/*
+ *    Copyright IBM Corp. 1999,2013
+ *
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ * The description below was taken in large parts from the powerpc
+ * bitops header file:
+ * Within a word, bits are numbered LSB first.  Lot's of places make
+ * this assumption by directly testing bits with (val & (1<<nr)).
+ * This can cause confusion for large (> 1 word) bitmaps on a
+ * big-endian system because, unlike little endian, the number of each
+ * bit depends on the word size.
+ *
+ * The bitop functions are defined to work on unsigned longs, so for an
+ * s390x system the bits end up numbered:
+ *   |63..............0|127............64|191...........128|255...........192|
+ * and on s390:
+ *   |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
+ *
+ * There are a few little-endian macros used mostly for filesystem
+ * bitmaps, these work on similar bit arrays layouts, but
+ * byte-oriented:
+ *   |7...0|15...8|23...16|31...24|39...32|47...40|55...48|63...56|
+ *
+ * The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
+ * number field needs to be reversed compared to the big-endian bit
+ * fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
+ *
+ * We also have special functions which work with an MSB0 encoding:
+ * on an s390x system the bits are numbered:
+ *   |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
+ *
+ * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit
+ * number field needs to be reversed compared to the LSB0 encoded bit
+ * fields. This can be achieved by XOR with 0x3f (64b) or 0x1f (32b).
+ *
+ */
+
+#ifndef _S390_BITOPS_H
+#define _S390_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/typecheck.h>
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+
+#define __BITOPS_NO_BARRIER	"\n"
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __BITOPS_OR		"laog"
+#define __BITOPS_AND		"lang"
+#define __BITOPS_XOR		"laxg"
+#define __BITOPS_BARRIER	"bcr	14,0\n"
+
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier)	\
+({								\
+	unsigned long __old;					\
+								\
+	typecheck(unsigned long *, (__addr));			\
+	asm volatile(						\
+		__barrier					\
+		__op_string "	%0,%2,%1\n"			\
+		__barrier					\
+		: "=d" (__old),	"+Q" (*(__addr))		\
+		: "d" (__val)					\
+		: "cc", "memory");				\
+	__old;							\
+})
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __BITOPS_OR		"ogr"
+#define __BITOPS_AND		"ngr"
+#define __BITOPS_XOR		"xgr"
+#define __BITOPS_BARRIER	"\n"
+
+#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier)	\
+({								\
+	unsigned long __old, __new;				\
+								\
+	typecheck(unsigned long *, (__addr));			\
+	asm volatile(						\
+		"	lg	%0,%2\n"			\
+		"0:	lgr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	csg	%0,%1,%2\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
+		: "d" (__val)					\
+		: "cc", "memory");				\
+	__old;							\
+})
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+static inline unsigned long *
+__bitops_word(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3);
+	return (unsigned long *)addr;
+}
+
+static inline unsigned char *
+__bitops_byte(unsigned long nr, volatile unsigned long *ptr)
+{
+	return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
+}
+
+static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+	if (__builtin_constant_p(nr)) {
+		unsigned char *caddr = __bitops_byte(nr, ptr);
+
+		asm volatile(
+			"oi	%0,%b1\n"
+			: "+Q" (*caddr)
+			: "i" (1 << (nr & 7))
+			: "cc", "memory");
+		return;
+	}
+#endif
+	mask = 1UL << (nr & (BITS_PER_LONG - 1));
+	__BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER);
+}
+
+static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+	if (__builtin_constant_p(nr)) {
+		unsigned char *caddr = __bitops_byte(nr, ptr);
+
+		asm volatile(
+			"ni	%0,%b1\n"
+			: "+Q" (*caddr)
+			: "i" (~(1 << (nr & 7)))
+			: "cc", "memory");
+		return;
+	}
+#endif
+	mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
+	__BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER);
+}
+
+static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long mask;
+
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+	if (__builtin_constant_p(nr)) {
+		unsigned char *caddr = __bitops_byte(nr, ptr);
+
+		asm volatile(
+			"xi	%0,%b1\n"
+			: "+Q" (*caddr)
+			: "i" (1 << (nr & 7))
+			: "cc", "memory");
+		return;
+	}
+#endif
+	mask = 1UL << (nr & (BITS_PER_LONG - 1));
+	__BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER);
+}
+
+static inline int
+test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long old, mask;
+
+	mask = 1UL << (nr & (BITS_PER_LONG - 1));
+	old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER);
+	return (old & mask) != 0;
+}
+
+static inline int
+test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long old, mask;
+
+	mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
+	old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER);
+	return (old & ~mask) != 0;
+}
+
+static inline int
+test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned long *addr = __bitops_word(nr, ptr);
+	unsigned long old, mask;
+
+	mask = 1UL << (nr & (BITS_PER_LONG - 1));
+	old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER);
+	return (old & mask) != 0;
+}
+
+static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned char *addr = __bitops_byte(nr, ptr);
+
+	*addr |= 1 << (nr & 7);
+}
+
+static inline void 
+__clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned char *addr = __bitops_byte(nr, ptr);
+
+	*addr &= ~(1 << (nr & 7));
+}
+
+static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned char *addr = __bitops_byte(nr, ptr);
+
+	*addr ^= 1 << (nr & 7);
+}
+
+static inline int
+__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned char *addr = __bitops_byte(nr, ptr);
+	unsigned char ch;
+
+	ch = *addr;
+	*addr |= 1 << (nr & 7);
+	return (ch >> (nr & 7)) & 1;
+}
+
+static inline int
+__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned char *addr = __bitops_byte(nr, ptr);
+	unsigned char ch;
+
+	ch = *addr;
+	*addr &= ~(1 << (nr & 7));
+	return (ch >> (nr & 7)) & 1;
+}
+
+static inline int
+__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
+{
+	unsigned char *addr = __bitops_byte(nr, ptr);
+	unsigned char ch;
+
+	ch = *addr;
+	*addr ^= 1 << (nr & 7);
+	return (ch >> (nr & 7)) & 1;
+}
+
+static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr)
+{
+	const volatile unsigned char *addr;
+
+	addr = ((const volatile unsigned char *)ptr);
+	addr += (nr ^ (BITS_PER_LONG - 8)) >> 3;
+	return (*addr >> (nr & 7)) & 1;
+}
+
+/*
+ * Functions which use MSB0 bit numbering.
+ * On an s390x system the bits are numbered:
+ *   |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
+ */
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size);
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+				unsigned long offset);
+
+static inline void set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+	return set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+	return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __set_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+	return __set_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline void __clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+{
+	return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+static inline int test_bit_inv(unsigned long nr,
+			       const volatile unsigned long *ptr)
+{
+	return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
+}
+
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+
+/**
+ * __flogr - find leftmost one
+ * @word - The word to search
+ *
+ * Returns the bit number of the most significant bit set,
+ * where the most significant bit has bit number 0.
+ * If no bit is set this function returns 64.
+ */
+static inline unsigned char __flogr(unsigned long word)
+{
+	if (__builtin_constant_p(word)) {
+		unsigned long bit = 0;
+
+		if (!word)
+			return 64;
+		if (!(word & 0xffffffff00000000UL)) {
+			word <<= 32;
+			bit += 32;
+		}
+		if (!(word & 0xffff000000000000UL)) {
+			word <<= 16;
+			bit += 16;
+		}
+		if (!(word & 0xff00000000000000UL)) {
+			word <<= 8;
+			bit += 8;
+		}
+		if (!(word & 0xf000000000000000UL)) {
+			word <<= 4;
+			bit += 4;
+		}
+		if (!(word & 0xc000000000000000UL)) {
+			word <<= 2;
+			bit += 2;
+		}
+		if (!(word & 0x8000000000000000UL)) {
+			word <<= 1;
+			bit += 1;
+		}
+		return bit;
+	} else {
+		register unsigned long bit asm("4") = word;
+		register unsigned long out asm("5");
+
+		asm volatile(
+			"       flogr   %[bit],%[bit]\n"
+			: [bit] "+d" (bit), [out] "=d" (out) : : "cc");
+		return bit;
+	}
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+	return __flogr(-word & word) ^ (BITS_PER_LONG - 1);
+}
+
+/**
+ * ffs - find first bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as the libc and
+ * compiler builtin ffs routines (man ffs).
+ */
+static inline int ffs(int word)
+{
+	unsigned long mask = 2 * BITS_PER_LONG - 1;
+	unsigned int val = (unsigned int)word;
+
+	return (1 + (__flogr(-val & val) ^ (BITS_PER_LONG - 1))) & mask;
+}
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __fls(unsigned long word)
+{
+	return __flogr(word) ^ (BITS_PER_LONG - 1);
+}
+
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @word: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+static inline int fls64(unsigned long word)
+{
+	unsigned long mask = 2 * BITS_PER_LONG - 1;
+
+	return (1 + (__flogr(word) ^ (BITS_PER_LONG - 1))) & mask;
+}
+
+/**
+ * fls - find last (most-significant) bit set
+ * @word: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static inline int fls(int word)
+{
+	return fls64((unsigned int)word);
+}
+
+#else /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
+
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/fls64.h>
+
+#endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* _S390_BITOPS_H */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
new file mode 100644
index 000000000..bf90d1fd9
--- /dev/null
+++ b/arch/s390/include/asm/bug.h
@@ -0,0 +1,71 @@
+#ifndef _ASM_S390_BUG_H
+#define _ASM_S390_BUG_H
+
+#include <linux/kernel.h>
+
+#ifdef CONFIG_BUG
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+
+#define __EMIT_BUG(x) do {					\
+	asm volatile(						\
+		"0:	j	0b+2\n"				\
+		"1:\n"						\
+		".section .rodata.str,\"aMS\",@progbits,1\n"	\
+		"2:	.asciz	\""__FILE__"\"\n"		\
+		".previous\n"					\
+		".section __bug_table,\"a\"\n"			\
+		"3:	.long	1b-3b,2b-3b\n"			\
+		"	.short	%0,%1\n"			\
+		"	.org	3b+%2\n"			\
+		".previous\n"					\
+		: : "i" (__LINE__),				\
+		    "i" (x),					\
+		    "i" (sizeof(struct bug_entry)));		\
+} while (0)
+
+#else /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define __EMIT_BUG(x) do {				\
+	asm volatile(					\
+		"0:	j	0b+2\n"			\
+		"1:\n"					\
+		".section __bug_table,\"a\"\n"		\
+		"2:	.long	1b-2b\n"		\
+		"	.short	%0\n"			\
+		"	.org	2b+%1\n"		\
+		".previous\n"				\
+		: : "i" (x),				\
+		    "i" (sizeof(struct bug_entry)));	\
+} while (0)
+
+#endif /* CONFIG_DEBUG_BUGVERBOSE */
+
+#define BUG() do {					\
+	__EMIT_BUG(0);					\
+	unreachable();					\
+} while (0)
+
+#define __WARN_TAINT(taint) do {			\
+	__EMIT_BUG(BUGFLAG_TAINT(taint));		\
+} while (0)
+
+#define WARN_ON(x) ({					\
+	int __ret_warn_on = !!(x);			\
+	if (__builtin_constant_p(__ret_warn_on)) {	\
+		if (__ret_warn_on)			\
+			__WARN();			\
+	} else {					\
+		if (unlikely(__ret_warn_on))		\
+			__WARN();			\
+	}						\
+	unlikely(__ret_warn_on);			\
+})
+
+#define HAVE_ARCH_BUG
+#define HAVE_ARCH_WARN_ON
+#endif /* CONFIG_BUG */
+
+#include <asm-generic/bug.h>
+
+#endif /* _ASM_S390_BUG_H */
diff --git a/arch/s390/include/asm/bugs.h b/arch/s390/include/asm/bugs.h
new file mode 100644
index 000000000..0f5bd894f
--- /dev/null
+++ b/arch/s390/include/asm/bugs.h
@@ -0,0 +1,20 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/bugs.h"
+ *    Copyright (C) 1994  Linus Torvalds
+ */
+
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *      void check_bugs(void);
+ */
+
+static inline void check_bugs(void)
+{
+  /* s390 has no bugs ... */
+}
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
new file mode 100644
index 000000000..4d7ccac5f
--- /dev/null
+++ b/arch/s390/include/asm/cache.h
@@ -0,0 +1,18 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *
+ *  Derived from "include/asm-i386/cache.h"
+ *    Copyright (C) 1992, Linus Torvalds
+ */
+
+#ifndef __ARCH_S390_CACHE_H
+#define __ARCH_S390_CACHE_H
+
+#define L1_CACHE_BYTES     256
+#define L1_CACHE_SHIFT     8
+#define NET_SKB_PAD	   32
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif
diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/cacheflush.h
new file mode 100644
index 000000000..58fae7d09
--- /dev/null
+++ b/arch/s390/include/asm/cacheflush.h
@@ -0,0 +1,12 @@
+#ifndef _S390_CACHEFLUSH_H
+#define _S390_CACHEFLUSH_H
+
+/* Caches aren't brain-dead on the s390. */
+#include <asm-generic/cacheflush.h>
+
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+
+#endif /* _S390_CACHEFLUSH_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
new file mode 100644
index 000000000..b80e456d6
--- /dev/null
+++ b/arch/s390/include/asm/ccwdev.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright IBM Corp. 2002, 2009
+ *
+ * Author(s): Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * Interface for CCW device drivers
+ */
+#ifndef _S390_CCWDEV_H_
+#define _S390_CCWDEV_H_
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <asm/fcx.h>
+#include <asm/irq.h>
+#include <asm/schid.h>
+
+/* structs from asm/cio.h */
+struct irb;
+struct ccw1;
+struct ccw_dev_id;
+
+/* simplified initializers for struct ccw_device:
+ * CCW_DEVICE and CCW_DEVICE_DEVTYPE initialize one
+ * entry in your MODULE_DEVICE_TABLE and set the match_flag correctly */
+#define CCW_DEVICE(cu, cum) 						\
+	.cu_type=(cu), .cu_model=(cum),					\
+	.match_flags=(CCW_DEVICE_ID_MATCH_CU_TYPE			\
+		   | (cum ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0))
+
+#define CCW_DEVICE_DEVTYPE(cu, cum, dev, devm)				\
+	.cu_type=(cu), .cu_model=(cum), .dev_type=(dev), .dev_model=(devm),\
+	.match_flags=CCW_DEVICE_ID_MATCH_CU_TYPE			\
+		   | ((cum) ? CCW_DEVICE_ID_MATCH_CU_MODEL : 0) 	\
+		   | CCW_DEVICE_ID_MATCH_DEVICE_TYPE			\
+		   | ((devm) ? CCW_DEVICE_ID_MATCH_DEVICE_MODEL : 0)
+
+/* scan through an array of device ids and return the first
+ * entry that matches the device.
+ *
+ * the array must end with an entry containing zero match_flags
+ */
+static inline const struct ccw_device_id *
+ccw_device_id_match(const struct ccw_device_id *array,
+			const struct ccw_device_id *match)
+{
+	const struct ccw_device_id *id = array;
+
+	for (id = array; id->match_flags; id++) {
+		if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_TYPE)
+		    && (id->cu_type != match->cu_type))
+			continue;
+
+		if ((id->match_flags & CCW_DEVICE_ID_MATCH_CU_MODEL)
+		    && (id->cu_model != match->cu_model))
+			continue;
+
+		if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_TYPE)
+		    && (id->dev_type != match->dev_type))
+			continue;
+
+		if ((id->match_flags & CCW_DEVICE_ID_MATCH_DEVICE_MODEL)
+		    && (id->dev_model != match->dev_model))
+			continue;
+
+		return id;
+	}
+
+	return NULL;
+}
+
+/**
+ * struct ccw_device - channel attached device
+ * @ccwlock: pointer to device lock
+ * @id: id of this device
+ * @drv: ccw driver for this device
+ * @dev: embedded device structure
+ * @online: online status of device
+ * @handler: interrupt handler
+ *
+ * @handler is a member of the device rather than the driver since a driver
+ * can have different interrupt handlers for different ccw devices
+ * (multi-subchannel drivers).
+ */
+struct ccw_device {
+	spinlock_t *ccwlock;
+/* private: */
+	struct ccw_device_private *private;	/* cio private information */
+/* public: */
+	struct ccw_device_id id;
+	struct ccw_driver *drv;
+	struct device dev;
+	int online;
+	void (*handler) (struct ccw_device *, unsigned long, struct irb *);
+};
+
+/*
+ * Possible events used by the path_event notifier.
+ */
+#define PE_NONE				0x0
+#define PE_PATH_GONE			0x1 /* A path is no longer available. */
+#define PE_PATH_AVAILABLE		0x2 /* A path has become available and
+					       was successfully verified. */
+#define PE_PATHGROUP_ESTABLISHED	0x4 /* A pathgroup was reset and had
+					       to be established again. */
+
+/*
+ * Possible CIO actions triggered by the unit check handler.
+ */
+enum uc_todo {
+	UC_TODO_RETRY,
+	UC_TODO_RETRY_ON_NEW_PATH,
+	UC_TODO_STOP
+};
+
+/**
+ * struct ccw driver - device driver for channel attached devices
+ * @ids: ids supported by this driver
+ * @probe: function called on probe
+ * @remove: function called on remove
+ * @set_online: called when setting device online
+ * @set_offline: called when setting device offline
+ * @notify: notify driver of device state changes
+ * @path_event: notify driver of channel path events
+ * @shutdown: called at device shutdown
+ * @prepare: prepare for pm state transition
+ * @complete: undo work done in @prepare
+ * @freeze: callback for freezing during hibernation snapshotting
+ * @thaw: undo work done in @freeze
+ * @restore: callback for restoring after hibernation
+ * @uc_handler: callback for unit check handler
+ * @driver: embedded device driver structure
+ * @int_class: interruption class to use for accounting interrupts
+ */
+struct ccw_driver {
+	struct ccw_device_id *ids;
+	int (*probe) (struct ccw_device *);
+	void (*remove) (struct ccw_device *);
+	int (*set_online) (struct ccw_device *);
+	int (*set_offline) (struct ccw_device *);
+	int (*notify) (struct ccw_device *, int);
+	void (*path_event) (struct ccw_device *, int *);
+	void (*shutdown) (struct ccw_device *);
+	int (*prepare) (struct ccw_device *);
+	void (*complete) (struct ccw_device *);
+	int (*freeze)(struct ccw_device *);
+	int (*thaw) (struct ccw_device *);
+	int (*restore)(struct ccw_device *);
+	enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *);
+	struct device_driver driver;
+	enum interruption_class int_class;
+};
+
+extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv,
+					      const char *bus_id);
+
+/* devices drivers call these during module load and unload.
+ * When a driver is registered, its probe method is called
+ * when new devices for its type pop up */
+extern int  ccw_driver_register   (struct ccw_driver *driver);
+extern void ccw_driver_unregister (struct ccw_driver *driver);
+
+struct ccw1;
+
+extern int ccw_device_set_options_mask(struct ccw_device *, unsigned long);
+extern int ccw_device_set_options(struct ccw_device *, unsigned long);
+extern void ccw_device_clear_options(struct ccw_device *, unsigned long);
+int ccw_device_is_pathgroup(struct ccw_device *cdev);
+int ccw_device_is_multipath(struct ccw_device *cdev);
+
+/* Allow for i/o completion notification after primary interrupt status. */
+#define CCWDEV_EARLY_NOTIFICATION	0x0001
+/* Report all interrupt conditions. */
+#define CCWDEV_REPORT_ALL	 	0x0002
+/* Try to perform path grouping. */
+#define CCWDEV_DO_PATHGROUP             0x0004
+/* Allow forced onlining of boxed devices. */
+#define CCWDEV_ALLOW_FORCE              0x0008
+/* Try to use multipath mode. */
+#define CCWDEV_DO_MULTIPATH		0x0010
+
+extern int ccw_device_start(struct ccw_device *, struct ccw1 *,
+			    unsigned long, __u8, unsigned long);
+extern int ccw_device_start_timeout(struct ccw_device *, struct ccw1 *,
+				    unsigned long, __u8, unsigned long, int);
+extern int ccw_device_start_key(struct ccw_device *, struct ccw1 *,
+				unsigned long, __u8, __u8, unsigned long);
+extern int ccw_device_start_timeout_key(struct ccw_device *, struct ccw1 *,
+					unsigned long, __u8, __u8,
+					unsigned long, int);
+
+
+extern int ccw_device_resume(struct ccw_device *);
+extern int ccw_device_halt(struct ccw_device *, unsigned long);
+extern int ccw_device_clear(struct ccw_device *, unsigned long);
+int ccw_device_tm_start_key(struct ccw_device *cdev, struct tcw *tcw,
+			    unsigned long intparm, u8 lpm, u8 key);
+int ccw_device_tm_start_key(struct ccw_device *, struct tcw *,
+			    unsigned long, u8, u8);
+int ccw_device_tm_start_timeout_key(struct ccw_device *, struct tcw *,
+			    unsigned long, u8, u8, int);
+int ccw_device_tm_start(struct ccw_device *, struct tcw *,
+			    unsigned long, u8);
+int ccw_device_tm_start_timeout(struct ccw_device *, struct tcw *,
+			    unsigned long, u8, int);
+int ccw_device_tm_intrg(struct ccw_device *cdev);
+
+int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask);
+
+extern int ccw_device_set_online(struct ccw_device *cdev);
+extern int ccw_device_set_offline(struct ccw_device *cdev);
+
+
+extern struct ciw *ccw_device_get_ciw(struct ccw_device *, __u32 cmd);
+extern __u8 ccw_device_get_path_mask(struct ccw_device *);
+extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
+
+#define get_ccwdev_lock(x) (x)->ccwlock
+
+#define to_ccwdev(n) container_of(n, struct ccw_device, dev)
+#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
+
+extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
+extern void ccw_device_destroy_console(struct ccw_device *);
+extern int ccw_device_enable_console(struct ccw_device *);
+extern void ccw_device_wait_idle(struct ccw_device *);
+extern int ccw_device_force_console(struct ccw_device *);
+
+int ccw_device_siosl(struct ccw_device *);
+
+extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
+
+struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int);
+#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
new file mode 100644
index 000000000..057ce0ca6
--- /dev/null
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -0,0 +1,73 @@
+#ifndef S390_CCWGROUP_H
+#define S390_CCWGROUP_H
+
+struct ccw_device;
+struct ccw_driver;
+
+/**
+ * struct ccwgroup_device - ccw group device
+ * @state: online/offline state
+ * @count: number of attached slave devices
+ * @dev: embedded device structure
+ * @cdev: variable number of slave devices, allocated as needed
+ * @ungroup_work: work to be done when a ccwgroup notifier has action
+ *	type %BUS_NOTIFY_UNBIND_DRIVER
+ */
+struct ccwgroup_device {
+	enum {
+		CCWGROUP_OFFLINE,
+		CCWGROUP_ONLINE,
+	} state;
+/* private: */
+	atomic_t onoff;
+	struct mutex reg_mutex;
+/* public: */
+	unsigned int count;
+	struct device	dev;
+	struct work_struct ungroup_work;
+	struct ccw_device *cdev[0];
+};
+
+/**
+ * struct ccwgroup_driver - driver for ccw group devices
+ * @setup: function called during device creation to setup the device
+ * @remove: function called on remove
+ * @set_online: function called when device is set online
+ * @set_offline: function called when device is set offline
+ * @shutdown: function called when device is shut down
+ * @prepare: prepare for pm state transition
+ * @complete: undo work done in @prepare
+ * @freeze: callback for freezing during hibernation snapshotting
+ * @thaw: undo work done in @freeze
+ * @restore: callback for restoring after hibernation
+ * @driver: embedded driver structure
+ */
+struct ccwgroup_driver {
+	int (*setup) (struct ccwgroup_device *);
+	void (*remove) (struct ccwgroup_device *);
+	int (*set_online) (struct ccwgroup_device *);
+	int (*set_offline) (struct ccwgroup_device *);
+	void (*shutdown)(struct ccwgroup_device *);
+	int (*prepare) (struct ccwgroup_device *);
+	void (*complete) (struct ccwgroup_device *);
+	int (*freeze)(struct ccwgroup_device *);
+	int (*thaw) (struct ccwgroup_device *);
+	int (*restore)(struct ccwgroup_device *);
+
+	struct device_driver driver;
+};
+
+extern int  ccwgroup_driver_register   (struct ccwgroup_driver *cdriver);
+extern void ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver);
+int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
+			int num_devices, const char *buf);
+
+extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
+extern int ccwgroup_set_offline(struct ccwgroup_device *gdev);
+
+extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev);
+extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev);
+
+#define to_ccwgroupdev(x) container_of((x), struct ccwgroup_device, dev)
+#define to_ccwgroupdrv(x) container_of((x), struct ccwgroup_driver, driver)
+#endif
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
new file mode 100644
index 000000000..740364856
--- /dev/null
+++ b/arch/s390/include/asm/checksum.h
@@ -0,0 +1,140 @@
+/*
+ *    S390 fast network checksum routines
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Ulrich Hild        (first version)
+ *               Martin Schwidefsky (heavily optimized CKSM version)
+ *               D.J. Barrow        (third attempt) 
+ */
+
+#ifndef _S390_CHECKSUM_H
+#define _S390_CHECKSUM_H
+
+#include <asm/uaccess.h>
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+static inline __wsum
+csum_partial(const void *buff, int len, __wsum sum)
+{
+	register unsigned long reg2 asm("2") = (unsigned long) buff;
+	register unsigned long reg3 asm("3") = (unsigned long) len;
+
+	asm volatile(
+		"0:	cksm	%0,%1\n"	/* do checksum on longs */
+		"	jo	0b\n"
+		: "+d" (sum), "+d" (reg2), "+d" (reg3) : : "cc", "memory");
+	return sum;
+}
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ *
+ * Copy from userspace and compute checksum.
+ */
+static inline __wsum
+csum_partial_copy_from_user(const void __user *src, void *dst,
+                                          int len, __wsum sum,
+                                          int *err_ptr)
+{
+	if (unlikely(copy_from_user(dst, src, len)))
+		*err_ptr = -EFAULT;
+	return csum_partial(dst, len, sum);
+}
+
+
+static inline __wsum
+csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
+{
+        memcpy(dst,src,len);
+	return csum_partial(dst, len, sum);
+}
+
+/*
+ *      Fold a partial checksum without adding pseudo headers
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	u32 csum = (__force u32) sum;
+
+	csum += (csum >> 16) + (csum << 16);
+	csum >>= 16;
+	return (__force __sum16) ~csum;
+}
+
+/*
+ *	This is a version of ip_compute_csum() optimized for IP headers,
+ *	which always checksum on 4 octet boundaries.
+ *
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	return csum_fold(csum_partial(iph, ihl*4, 0));
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 32-bit checksum
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+                   unsigned short len, unsigned short proto,
+                   __wsum sum)
+{
+	__u32 csum = (__force __u32)sum;
+
+	csum += (__force __u32)saddr;
+	if (csum < (__force __u32)saddr)
+		csum++;
+
+	csum += (__force __u32)daddr;
+	if (csum < (__force __u32)daddr)
+		csum++;
+
+	csum += len + proto;
+	if (csum < len + proto)
+		csum++;
+
+	return (__force __wsum)csum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+                  unsigned short len, unsigned short proto,
+                  __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+	return csum_fold(csum_partial(buff, len, 0));
+}
+
+#endif /* _S390_CHECKSUM_H */
+
+
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
new file mode 100644
index 000000000..7298eec98
--- /dev/null
+++ b/arch/s390/include/asm/chpid.h
@@ -0,0 +1,50 @@
+/*
+ *    Copyright IBM Corp. 2007, 2012
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+#ifndef _ASM_S390_CHPID_H
+#define _ASM_S390_CHPID_H
+
+#include <uapi/asm/chpid.h>
+#include <asm/cio.h>
+
+struct channel_path_desc {
+	u8 flags;
+	u8 lsn;
+	u8 desc;
+	u8 chpid;
+	u8 swla;
+	u8 zeroes;
+	u8 chla;
+	u8 chpp;
+} __packed;
+
+static inline void chp_id_init(struct chp_id *chpid)
+{
+	memset(chpid, 0, sizeof(struct chp_id));
+}
+
+static inline int chp_id_is_equal(struct chp_id *a, struct chp_id *b)
+{
+	return (a->id == b->id) && (a->cssid == b->cssid);
+}
+
+static inline void chp_id_next(struct chp_id *chpid)
+{
+	if (chpid->id < __MAX_CHPID)
+		chpid->id++;
+	else {
+		chpid->id = 0;
+		chpid->cssid++;
+	}
+}
+
+static inline int chp_id_is_valid(struct chp_id *chpid)
+{
+	return (chpid->cssid <= __MAX_CSSID);
+}
+
+
+#define chp_id_for_each(c) \
+	for (chp_id_init(c); chp_id_is_valid(c); chp_id_next(c))
+#endif /* _ASM_S390_CHPID_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
new file mode 100644
index 000000000..096339207
--- /dev/null
+++ b/arch/s390/include/asm/cio.h
@@ -0,0 +1,315 @@
+/*
+ * Common interface for I/O on S/390
+ */
+#ifndef _ASM_S390_CIO_H_
+#define _ASM_S390_CIO_H_
+
+#include <linux/spinlock.h>
+#include <asm/types.h>
+
+#define LPM_ANYPATH 0xff
+#define __MAX_CSSID 0
+#define __MAX_SUBCHANNEL 65535
+#define __MAX_SSID 3
+
+#include <asm/scsw.h>
+
+/**
+ * struct ccw1 - channel command word
+ * @cmd_code: command code
+ * @flags: flags, like IDA addressing, etc.
+ * @count: byte count
+ * @cda: data address
+ *
+ * The ccw is the basic structure to build channel programs that perform
+ * operations with the device or the control unit. Only Format-1 channel
+ * command words are supported.
+ */
+struct ccw1 {
+	__u8  cmd_code;
+	__u8  flags;
+	__u16 count;
+	__u32 cda;
+} __attribute__ ((packed,aligned(8)));
+
+#define CCW_FLAG_DC		0x80
+#define CCW_FLAG_CC		0x40
+#define CCW_FLAG_SLI		0x20
+#define CCW_FLAG_SKIP		0x10
+#define CCW_FLAG_PCI		0x08
+#define CCW_FLAG_IDA		0x04
+#define CCW_FLAG_SUSPEND	0x02
+
+#define CCW_CMD_READ_IPL	0x02
+#define CCW_CMD_NOOP		0x03
+#define CCW_CMD_BASIC_SENSE	0x04
+#define CCW_CMD_TIC		0x08
+#define CCW_CMD_STLCK           0x14
+#define CCW_CMD_SENSE_PGID	0x34
+#define CCW_CMD_SUSPEND_RECONN	0x5B
+#define CCW_CMD_RDC		0x64
+#define CCW_CMD_RELEASE		0x94
+#define CCW_CMD_SET_PGID	0xAF
+#define CCW_CMD_SENSE_ID	0xE4
+#define CCW_CMD_DCTL		0xF3
+
+#define SENSE_MAX_COUNT		0x20
+
+/**
+ * struct erw - extended report word
+ * @res0: reserved
+ * @auth: authorization check
+ * @pvrf: path-verification-required flag
+ * @cpt: channel-path timeout
+ * @fsavf: failing storage address validity flag
+ * @cons: concurrent sense
+ * @scavf: secondary ccw address validity flag
+ * @fsaf: failing storage address format
+ * @scnt: sense count, if @cons == %1
+ * @res16: reserved
+ */
+struct erw {
+	__u32 res0  : 3;
+	__u32 auth  : 1;
+	__u32 pvrf  : 1;
+	__u32 cpt   : 1;
+	__u32 fsavf : 1;
+	__u32 cons  : 1;
+	__u32 scavf : 1;
+	__u32 fsaf  : 1;
+	__u32 scnt  : 6;
+	__u32 res16 : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct erw_eadm - EADM Subchannel extended report word
+ * @b: aob error
+ * @r: arsb error
+ */
+struct erw_eadm {
+	__u32 : 16;
+	__u32 b : 1;
+	__u32 r : 1;
+	__u32  : 14;
+} __packed;
+
+/**
+ * struct sublog - subchannel logout area
+ * @res0: reserved
+ * @esf: extended status flags
+ * @lpum: last path used mask
+ * @arep: ancillary report
+ * @fvf: field-validity flags
+ * @sacc: storage access code
+ * @termc: termination code
+ * @devsc: device-status check
+ * @serr: secondary error
+ * @ioerr: i/o-error alert
+ * @seqc: sequence code
+ */
+struct sublog {
+	__u32 res0  : 1;
+	__u32 esf   : 7;
+	__u32 lpum  : 8;
+	__u32 arep  : 1;
+	__u32 fvf   : 5;
+	__u32 sacc  : 2;
+	__u32 termc : 2;
+	__u32 devsc : 1;
+	__u32 serr  : 1;
+	__u32 ioerr : 1;
+	__u32 seqc  : 3;
+} __attribute__ ((packed));
+
+/**
+ * struct esw0 - Format 0 Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ * @faddr: failing storage address
+ * @saddr: secondary ccw address
+ */
+struct esw0 {
+	struct sublog sublog;
+	struct erw erw;
+	__u32  faddr[2];
+	__u32  saddr;
+} __attribute__ ((packed));
+
+/**
+ * struct esw1 - Format 1 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @zero16: reserved zeros
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw1 {
+	__u8  zero0;
+	__u8  lpum;
+	__u16 zero16;
+	struct erw erw;
+	__u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw2 - Format 2 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @dcti: device-connect-time interval
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw2 {
+	__u8  zero0;
+	__u8  lpum;
+	__u16 dcti;
+	struct erw erw;
+	__u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw3 - Format 3 Extended Status Word (ESW)
+ * @zero0: reserved zeros
+ * @lpum: last path used mask
+ * @res: reserved
+ * @erw: extended report word
+ * @zeros: three fullwords of zeros
+ */
+struct esw3 {
+	__u8  zero0;
+	__u8  lpum;
+	__u16 res;
+	struct erw erw;
+	__u32 zeros[3];
+} __attribute__ ((packed));
+
+/**
+ * struct esw_eadm - EADM Subchannel Extended Status Word (ESW)
+ * @sublog: subchannel logout
+ * @erw: extended report word
+ */
+struct esw_eadm {
+	__u32 sublog;
+	struct erw_eadm erw;
+	__u32 : 32;
+	__u32 : 32;
+	__u32 : 32;
+} __packed;
+
+/**
+ * struct irb - interruption response block
+ * @scsw: subchannel status word
+ * @esw: extended status word
+ * @ecw: extended control word
+ *
+ * The irb that is handed to the device driver when an interrupt occurs. For
+ * solicited interrupts, the common I/O layer already performs checks whether
+ * a field is valid; a field not being valid is always passed as %0.
+ * If a unit check occurred, @ecw may contain sense data; this is retrieved
+ * by the common I/O layer itself if the device doesn't support concurrent
+ * sense (so that the device driver never needs to perform basic sene itself).
+ * For unsolicited interrupts, the irb is passed as-is (expect for sense data,
+ * if applicable).
+ */
+struct irb {
+	union scsw scsw;
+	union {
+		struct esw0 esw0;
+		struct esw1 esw1;
+		struct esw2 esw2;
+		struct esw3 esw3;
+		struct esw_eadm eadm;
+	} esw;
+	__u8   ecw[32];
+} __attribute__ ((packed,aligned(4)));
+
+/**
+ * struct ciw - command information word  (CIW) layout
+ * @et: entry type
+ * @reserved: reserved bits
+ * @ct: command type
+ * @cmd: command code
+ * @count: command count
+ */
+struct ciw {
+	__u32 et       :  2;
+	__u32 reserved :  2;
+	__u32 ct       :  4;
+	__u32 cmd      :  8;
+	__u32 count    : 16;
+} __attribute__ ((packed));
+
+#define CIW_TYPE_RCD	0x0    	/* read configuration data */
+#define CIW_TYPE_SII	0x1    	/* set interface identifier */
+#define CIW_TYPE_RNI	0x2    	/* read node identifier */
+
+/*
+ * Flags used as input parameters for do_IO()
+ */
+#define DOIO_ALLOW_SUSPEND	 0x0001 /* allow for channel prog. suspend */
+#define DOIO_DENY_PREFETCH	 0x0002 /* don't allow for CCW prefetch */
+#define DOIO_SUPPRESS_INTER	 0x0004 /* suppress intermediate inter. */
+					/* ... for suspended CCWs */
+/* Device or subchannel gone. */
+#define CIO_GONE       0x0001
+/* No path to device. */
+#define CIO_NO_PATH    0x0002
+/* Device has appeared. */
+#define CIO_OPER       0x0004
+/* Sick revalidation of device. */
+#define CIO_REVALIDATE 0x0008
+/* Device did not respond in time. */
+#define CIO_BOXED      0x0010
+
+/**
+ * struct ccw_dev_id - unique identifier for ccw devices
+ * @ssid: subchannel set id
+ * @devno: device number
+ *
+ * This structure is not directly based on any hardware structure. The
+ * hardware identifies a device by its device number and its subchannel,
+ * which is in turn identified by its id. In order to get a unique identifier
+ * for ccw devices across subchannel sets, @struct ccw_dev_id has been
+ * introduced.
+ */
+struct ccw_dev_id {
+	u8 ssid;
+	u16 devno;
+};
+
+/**
+ * ccw_device_id_is_equal() - compare two ccw_dev_ids
+ * @dev_id1: a ccw_dev_id
+ * @dev_id2: another ccw_dev_id
+ * Returns:
+ *  %1 if the two structures are equal field-by-field,
+ *  %0 if not.
+ * Context:
+ *  any
+ */
+static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
+				      struct ccw_dev_id *dev_id2)
+{
+	if ((dev_id1->ssid == dev_id2->ssid) &&
+	    (dev_id1->devno == dev_id2->devno))
+		return 1;
+	return 0;
+}
+
+void channel_subsystem_reinit(void);
+extern void css_schedule_reprobe(void);
+
+extern void reipl_ccw_dev(struct ccw_dev_id *id);
+
+struct cio_iplinfo {
+	u16 devno;
+	int is_qdio;
+};
+
+extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
+
+/* Function from drivers/s390/cio/chsc.c */
+int chsc_sstpc(void *page, unsigned int op, u16 ctrl);
+int chsc_sstpi(void *page, void *result, size_t size);
+
+#endif
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
new file mode 100644
index 000000000..a0e71a501
--- /dev/null
+++ b/arch/s390/include/asm/clp.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_S390_CLP_H
+#define _ASM_S390_CLP_H
+
+/* CLP common request & response block size */
+#define CLP_BLK_SIZE			PAGE_SIZE
+
+struct clp_req_hdr {
+	u16 len;
+	u16 cmd;
+} __packed;
+
+struct clp_rsp_hdr {
+	u16 len;
+	u16 rsp;
+} __packed;
+
+/* CLP Response Codes */
+#define CLP_RC_OK			0x0010	/* Command request successfully */
+#define CLP_RC_CMD			0x0020	/* Command code not recognized */
+#define CLP_RC_PERM			0x0030	/* Command not authorized */
+#define CLP_RC_FMT			0x0040	/* Invalid command request format */
+#define CLP_RC_LEN			0x0050	/* Invalid command request length */
+#define CLP_RC_8K			0x0060	/* Command requires 8K LPCB */
+#define CLP_RC_RESNOT0			0x0070	/* Reserved field not zero */
+#define CLP_RC_NODATA			0x0080	/* No data available */
+#define CLP_RC_FC_UNKNOWN		0x0100	/* Function code not recognized */
+
+#endif
diff --git a/arch/s390/include/asm/cmb.h b/arch/s390/include/asm/cmb.h
new file mode 100644
index 000000000..806eac12e
--- /dev/null
+++ b/arch/s390/include/asm/cmb.h
@@ -0,0 +1,12 @@
+#ifndef S390_CMB_H
+#define S390_CMB_H
+
+#include <uapi/asm/cmb.h>
+
+struct ccw_device;
+extern int enable_cmf(struct ccw_device *cdev);
+extern int disable_cmf(struct ccw_device *cdev);
+extern u64 cmf_read(struct ccw_device *cdev, int index);
+extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data);
+
+#endif /* S390_CMB_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
new file mode 100644
index 000000000..4eadec466
--- /dev/null
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright IBM Corp. 1999, 2011
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#ifndef __ASM_CMPXCHG_H
+#define __ASM_CMPXCHG_H
+
+#include <linux/mmdebug.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+
+#define cmpxchg(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) __o = (o);					\
+	__typeof__(*(ptr)) __n = (n);					\
+	(__typeof__(*(ptr))) __sync_val_compare_and_swap((ptr),__o,__n);\
+})
+
+#define cmpxchg64	cmpxchg
+#define cmpxchg_local	cmpxchg
+#define cmpxchg64_local	cmpxchg
+
+#define xchg(ptr, x)							\
+({									\
+	__typeof__(ptr) __ptr = (ptr);					\
+	__typeof__(*(ptr)) __old;					\
+	do {								\
+		__old = *__ptr;						\
+	} while (!__sync_bool_compare_and_swap(__ptr, __old, x));	\
+	__old;								\
+})
+
+#define __HAVE_ARCH_CMPXCHG
+
+#define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn)		\
+({									\
+	register __typeof__(*(p1)) __old1 asm("2") = (o1);		\
+	register __typeof__(*(p2)) __old2 asm("3") = (o2);		\
+	register __typeof__(*(p1)) __new1 asm("4") = (n1);		\
+	register __typeof__(*(p2)) __new2 asm("5") = (n2);		\
+	int cc;								\
+	asm volatile(							\
+			insn   " %[old],%[new],%[ptr]\n"		\
+		"	ipm	%[cc]\n"				\
+		"	srl	%[cc],28"				\
+		: [cc] "=d" (cc), [old] "+d" (__old1), "+d" (__old2)	\
+		: [new] "d" (__new1), "d" (__new2),			\
+		  [ptr] "Q" (*(p1)), "Q" (*(p2))			\
+		: "memory", "cc");					\
+	!cc;								\
+})
+
+#define __cmpxchg_double_4(p1, p2, o1, o2, n1, n2) \
+	__cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cds")
+
+#define __cmpxchg_double_8(p1, p2, o1, o2, n1, n2) \
+	__cmpxchg_double_op(p1, p2, o1, o2, n1, n2, "cdsg")
+
+extern void __cmpxchg_double_called_with_bad_pointer(void);
+
+#define __cmpxchg_double(p1, p2, o1, o2, n1, n2)			\
+({									\
+	int __ret;							\
+	switch (sizeof(*(p1))) {					\
+	case 4:								\
+		__ret = __cmpxchg_double_4(p1, p2, o1, o2, n1, n2);	\
+		break;							\
+	case 8:								\
+		__ret = __cmpxchg_double_8(p1, p2, o1, o2, n1, n2);	\
+		break;							\
+	default:							\
+		__cmpxchg_double_called_with_bad_pointer();		\
+	}								\
+	__ret;								\
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2)				\
+({									\
+	__typeof__(p1) __p1 = (p1);					\
+	__typeof__(p2) __p2 = (p2);					\
+	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
+	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
+	VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
+	__cmpxchg_double_8(__p1, __p2, o1, o2, n1, n2);			\
+})
+
+#define system_has_cmpxchg_double()	1
+
+#endif /* __ASM_CMPXCHG_H */
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
new file mode 100644
index 000000000..d350ed9d0
--- /dev/null
+++ b/arch/s390/include/asm/compat.h
@@ -0,0 +1,359 @@
+#ifndef _ASM_S390X_COMPAT_H
+#define _ASM_S390X_COMPAT_H
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/thread_info.h>
+
+#define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64))
+
+#define __SC_DELOUSE(t,v) ({ \
+	BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \
+	(t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \
+})
+
+#define PSW32_MASK_PER		0x40000000UL
+#define PSW32_MASK_DAT		0x04000000UL
+#define PSW32_MASK_IO		0x02000000UL
+#define PSW32_MASK_EXT		0x01000000UL
+#define PSW32_MASK_KEY		0x00F00000UL
+#define PSW32_MASK_BASE		0x00080000UL	/* Always one */
+#define PSW32_MASK_MCHECK	0x00040000UL
+#define PSW32_MASK_WAIT		0x00020000UL
+#define PSW32_MASK_PSTATE	0x00010000UL
+#define PSW32_MASK_ASC		0x0000C000UL
+#define PSW32_MASK_CC		0x00003000UL
+#define PSW32_MASK_PM		0x00000f00UL
+#define PSW32_MASK_RI		0x00000080UL
+
+#define PSW32_MASK_USER		0x0000FF00UL
+
+#define PSW32_ADDR_AMODE	0x80000000UL
+#define PSW32_ADDR_INSN		0x7FFFFFFFUL
+
+#define PSW32_DEFAULT_KEY	(((u32) PAGE_DEFAULT_ACC) << 20)
+
+#define PSW32_ASC_PRIMARY	0x00000000UL
+#define PSW32_ASC_ACCREG	0x00004000UL
+#define PSW32_ASC_SECONDARY	0x00008000UL
+#define PSW32_ASC_HOME		0x0000C000UL
+
+#define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \
+			 PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \
+			 PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \
+			 PSW32_ASC_PRIMARY)
+
+#define COMPAT_USER_HZ		100
+#define COMPAT_UTS_MACHINE	"s390\0\0\0\0"
+
+typedef u32		compat_size_t;
+typedef s32		compat_ssize_t;
+typedef s32		compat_time_t;
+typedef s32		compat_clock_t;
+typedef s32		compat_pid_t;
+typedef u16		__compat_uid_t;
+typedef u16		__compat_gid_t;
+typedef u32		__compat_uid32_t;
+typedef u32		__compat_gid32_t;
+typedef u16		compat_mode_t;
+typedef u32		compat_ino_t;
+typedef u16		compat_dev_t;
+typedef s32		compat_off_t;
+typedef s64		compat_loff_t;
+typedef u16		compat_nlink_t;
+typedef u16		compat_ipc_pid_t;
+typedef s32		compat_daddr_t;
+typedef u32		compat_caddr_t;
+typedef __kernel_fsid_t	compat_fsid_t;
+typedef s32		compat_key_t;
+typedef s32		compat_timer_t;
+
+typedef s32		compat_int_t;
+typedef s32		compat_long_t;
+typedef s64		compat_s64;
+typedef u32		compat_uint_t;
+typedef u32		compat_ulong_t;
+typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
+
+typedef struct {
+	u32 mask;
+	u32 addr;
+} __aligned(8) psw_compat_t;
+
+typedef struct {
+	psw_compat_t psw;
+	u32 gprs[NUM_GPRS];
+	u32 acrs[NUM_ACRS];
+	u32 orig_gpr2;
+} s390_compat_regs;
+
+typedef struct {
+	u32 gprs_high[NUM_GPRS];
+} s390_compat_regs_high;
+
+struct compat_timespec {
+	compat_time_t	tv_sec;
+	s32		tv_nsec;
+};
+
+struct compat_timeval {
+	compat_time_t	tv_sec;
+	s32		tv_usec;
+};
+
+struct compat_stat {
+	compat_dev_t	st_dev;
+	u16		__pad1;
+	compat_ino_t	st_ino;
+	compat_mode_t	st_mode;
+	compat_nlink_t	st_nlink;
+	__compat_uid_t	st_uid;
+	__compat_gid_t	st_gid;
+	compat_dev_t	st_rdev;
+	u16		__pad2;
+	u32		st_size;
+	u32		st_blksize;
+	u32		st_blocks;
+	u32		st_atime;
+	u32		st_atime_nsec;
+	u32		st_mtime;
+	u32		st_mtime_nsec;
+	u32		st_ctime;
+	u32		st_ctime_nsec;
+	u32		__unused4;
+	u32		__unused5;
+};
+
+struct compat_flock {
+	short		l_type;
+	short		l_whence;
+	compat_off_t	l_start;
+	compat_off_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+#define F_GETLK64       12
+#define F_SETLK64       13
+#define F_SETLKW64      14    
+
+struct compat_flock64 {
+	short		l_type;
+	short		l_whence;
+	compat_loff_t	l_start;
+	compat_loff_t	l_len;
+	compat_pid_t	l_pid;
+};
+
+struct compat_statfs {
+	u32		f_type;
+	u32		f_bsize;
+	u32		f_blocks;
+	u32		f_bfree;
+	u32		f_bavail;
+	u32		f_files;
+	u32		f_ffree;
+	compat_fsid_t	f_fsid;
+	u32		f_namelen;
+	u32		f_frsize;
+	u32		f_flags;
+	u32		f_spare[4];
+};
+
+struct compat_statfs64 {
+	u32		f_type;
+	u32		f_bsize;
+	u64		f_blocks;
+	u64		f_bfree;
+	u64		f_bavail;
+	u64		f_files;
+	u64		f_ffree;
+	compat_fsid_t	f_fsid;
+	u32		f_namelen;
+	u32		f_frsize;
+	u32		f_flags;
+	u32		f_spare[4];
+};
+
+#define COMPAT_RLIM_OLD_INFINITY	0x7fffffff
+#define COMPAT_RLIM_INFINITY		0xffffffff
+
+typedef u32		compat_old_sigset_t;	/* at least 32 bits */
+
+#define _COMPAT_NSIG		64
+#define _COMPAT_NSIG_BPW	32
+
+typedef u32		compat_sigset_word;
+
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int	si_signo;
+	int	si_errno;
+	int	si_code;
+
+	union {
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			pid_t	_pid;	/* sender's pid */
+			uid_t	_uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;		/* timer id */
+			int _overrun;			/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			pid_t			_pid;	/* sender's pid */
+			uid_t			_uid;	/* sender's uid */
+			compat_sigval_t		_sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			pid_t			_pid;	/* which child */
+			uid_t			_uid;	/* sender's uid */
+			int			_status;/* exit code */
+			compat_clock_t		_utime;
+			compat_clock_t		_stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			__u32	_addr;	/* faulting insn/memory ref. - pointer */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int	_band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int	_fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
+/*
+ * How these fields are to be accessed.
+ */
+#define si_pid		_sifields._kill._pid
+#define si_uid		_sifields._kill._uid
+#define si_status	_sifields._sigchld._status
+#define si_utime	_sifields._sigchld._utime
+#define si_stime	_sifields._sigchld._stime
+#define si_value	_sifields._rt._sigval
+#define si_int		_sifields._rt._sigval.sival_int
+#define si_ptr		_sifields._rt._sigval.sival_ptr
+#define si_addr		_sifields._sigfault._addr
+#define si_band		_sifields._sigpoll._band
+#define si_fd		_sifields._sigpoll._fd
+#define si_tid		_sifields._timer._tid
+#define si_overrun	_sifields._timer._overrun
+
+#define COMPAT_OFF_T_MAX	0x7fffffff
+#define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+	return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
+#ifdef CONFIG_COMPAT
+
+static inline int is_compat_task(void)
+{
+	return is_32bit_task();
+}
+
+static inline void __user *arch_compat_alloc_user_space(long len)
+{
+	unsigned long stack;
+
+	stack = KSTK_ESP(current);
+	if (is_compat_task())
+		stack &= 0x7fffffffUL;
+	return (void __user *) (stack - len);
+}
+
+#endif
+
+struct compat_ipc64_perm {
+	compat_key_t key;
+	__compat_uid32_t uid;
+	__compat_gid32_t gid;
+	__compat_uid32_t cuid;
+	__compat_gid32_t cgid;
+	compat_mode_t mode;
+	unsigned short __pad1;
+	unsigned short seq;
+	unsigned short __pad2;
+	unsigned int __unused1;
+	unsigned int __unused2;
+};
+
+struct compat_semid64_ds {
+	struct compat_ipc64_perm sem_perm;
+	compat_time_t  sem_otime;
+	compat_ulong_t __pad1;
+	compat_time_t  sem_ctime;
+	compat_ulong_t __pad2;
+	compat_ulong_t sem_nsems;
+	compat_ulong_t __unused1;
+	compat_ulong_t __unused2;
+};
+
+struct compat_msqid64_ds {
+	struct compat_ipc64_perm msg_perm;
+	compat_time_t   msg_stime;
+	compat_ulong_t __pad1;
+	compat_time_t   msg_rtime;
+	compat_ulong_t __pad2;
+	compat_time_t   msg_ctime;
+	compat_ulong_t __pad3;
+	compat_ulong_t msg_cbytes;
+	compat_ulong_t msg_qnum;
+	compat_ulong_t msg_qbytes;
+	compat_pid_t   msg_lspid;
+	compat_pid_t   msg_lrpid;
+	compat_ulong_t __unused1;
+	compat_ulong_t __unused2;
+};
+
+struct compat_shmid64_ds {
+	struct compat_ipc64_perm shm_perm;
+	compat_size_t  shm_segsz;
+	compat_time_t  shm_atime;
+	compat_ulong_t __pad1;
+	compat_time_t  shm_dtime;
+	compat_ulong_t __pad2;
+	compat_time_t  shm_ctime;
+	compat_ulong_t __pad3;
+	compat_pid_t   shm_cpid;
+	compat_pid_t   shm_lpid;
+	compat_ulong_t shm_nattch;
+	compat_ulong_t __unused1;
+	compat_ulong_t __unused2;
+};
+#endif /* _ASM_S390X_COMPAT_H */
diff --git a/arch/s390/include/asm/cpcmd.h b/arch/s390/include/asm/cpcmd.h
new file mode 100644
index 000000000..3dfadb5d6
--- /dev/null
+++ b/arch/s390/include/asm/cpcmd.h
@@ -0,0 +1,32 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#ifndef _ASM_S390_CPCMD_H
+#define _ASM_S390_CPCMD_H
+
+/*
+ * the lowlevel function for cpcmd
+ * the caller of __cpcmd has to ensure that the response buffer is below 2 GB
+ */
+extern int __cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+
+/*
+ * cpcmd is the in-kernel interface for issuing CP commands
+ *
+ * cmd:		null-terminated command string, max 240 characters
+ * response:	response buffer for VM's textual response
+ * rlen:	size of the response buffer, cpcmd will not exceed this size
+ *		but will cap the output, if its too large. Everything that
+ *		did not fit into the buffer will be silently dropped
+ * response_code: return pointer for VM's error code
+ * return value: the size of the response. The caller can check if the buffer
+ *		was large enough by comparing the return value and rlen
+ * NOTE: If the response buffer is not below 2 GB, cpcmd can sleep
+ */
+extern int cpcmd(const char *cmd, char *response, int rlen, int *response_code);
+
+#endif /* _ASM_S390_CPCMD_H */
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
new file mode 100644
index 000000000..f5a8e2fcd
--- /dev/null
+++ b/arch/s390/include/asm/cpu.h
@@ -0,0 +1,26 @@
+/*
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Christian Ehrhardt <ehrhardt@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CPU_H
+#define _ASM_S390_CPU_H
+
+#define MAX_CPU_ADDRESS 255
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct cpuid
+{
+	unsigned int version :	8;
+	unsigned int ident   : 24;
+	unsigned int machine : 16;
+	unsigned int unused  : 16;
+} __attribute__ ((packed, aligned(8)));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_CPU_H */
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
new file mode 100644
index 000000000..5243a8679
--- /dev/null
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -0,0 +1,297 @@
+/*
+ * CPU-measurement facilities
+ *
+ *  Copyright IBM Corp. 2012
+ *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *	       Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#ifndef _ASM_S390_CPU_MF_H
+#define _ASM_S390_CPU_MF_H
+
+#include <linux/errno.h>
+#include <asm/facility.h>
+
+#define CPU_MF_INT_SF_IAE	(1 << 31)	/* invalid entry address */
+#define CPU_MF_INT_SF_ISE	(1 << 30)	/* incorrect SDBT entry */
+#define CPU_MF_INT_SF_PRA	(1 << 29)	/* program request alert */
+#define CPU_MF_INT_SF_SACA	(1 << 23)	/* sampler auth. change alert */
+#define CPU_MF_INT_SF_LSDA	(1 << 22)	/* loss of sample data alert */
+#define CPU_MF_INT_CF_CACA	(1 <<  7)	/* counter auth. change alert */
+#define CPU_MF_INT_CF_LCDA	(1 <<  6)	/* loss of counter data alert */
+#define CPU_MF_INT_RI_HALTED	(1 <<  5)	/* run-time instr. halted */
+#define CPU_MF_INT_RI_BUF_FULL	(1 <<  4)	/* run-time instr. program
+						   buffer full */
+
+#define CPU_MF_INT_CF_MASK	(CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
+#define CPU_MF_INT_SF_MASK	(CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE|	\
+				 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA|	\
+				 CPU_MF_INT_SF_LSDA)
+#define CPU_MF_INT_RI_MASK	(CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL)
+
+/* CPU measurement facility support */
+static inline int cpum_cf_avail(void)
+{
+	return MACHINE_HAS_LPP && test_facility(67);
+}
+
+static inline int cpum_sf_avail(void)
+{
+	return MACHINE_HAS_LPP && test_facility(68);
+}
+
+
+struct cpumf_ctr_info {
+	u16   cfvn;
+	u16   auth_ctl;
+	u16   enable_ctl;
+	u16   act_ctl;
+	u16   max_cpu;
+	u16   csvn;
+	u16   max_cg;
+	u16   reserved1;
+	u32   reserved2[12];
+} __packed;
+
+/* QUERY SAMPLING INFORMATION block */
+struct hws_qsi_info_block {	    /* Bit(s) */
+	unsigned int b0_13:14;	    /* 0-13: zeros			 */
+	unsigned int as:1;	    /* 14: basic-sampling authorization	 */
+	unsigned int ad:1;	    /* 15: diag-sampling authorization	 */
+	unsigned int b16_21:6;	    /* 16-21: zeros			 */
+	unsigned int es:1;	    /* 22: basic-sampling enable control */
+	unsigned int ed:1;	    /* 23: diag-sampling enable control	 */
+	unsigned int b24_29:6;	    /* 24-29: zeros			 */
+	unsigned int cs:1;	    /* 30: basic-sampling activation control */
+	unsigned int cd:1;	    /* 31: diag-sampling activation control */
+	unsigned int bsdes:16;	    /* 4-5: size of basic sampling entry */
+	unsigned int dsdes:16;	    /* 6-7: size of diagnostic sampling entry */
+	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+	unsigned long tear;	    /* 24-31: TEAR contents		 */
+	unsigned long dear;	    /* 32-39: DEAR contents		 */
+	unsigned int rsvrd0;	    /* 40-43: reserved			 */
+	unsigned int cpu_speed;     /* 44-47: CPU speed 		 */
+	unsigned long long rsvrd1;  /* 48-55: reserved			 */
+	unsigned long long rsvrd2;  /* 56-63: reserved			 */
+} __packed;
+
+/* SET SAMPLING CONTROLS request block */
+struct hws_lsctl_request_block {
+	unsigned int s:1;	    /* 0: maximum buffer indicator	 */
+	unsigned int h:1;	    /* 1: part. level reserved for VM use*/
+	unsigned long long b2_53:52;/* 2-53: zeros			 */
+	unsigned int es:1;	    /* 54: basic-sampling enable control */
+	unsigned int ed:1;	    /* 55: diag-sampling enable control	 */
+	unsigned int b56_61:6;	    /* 56-61: - zeros			 */
+	unsigned int cs:1;	    /* 62: basic-sampling activation control */
+	unsigned int cd:1;	    /* 63: diag-sampling activation control  */
+	unsigned long interval;     /* 8-15: sampling interval		 */
+	unsigned long tear;	    /* 16-23: TEAR contents		 */
+	unsigned long dear;	    /* 24-31: DEAR contents		 */
+	/* 32-63:							 */
+	unsigned long rsvrd1;	    /* reserved 			 */
+	unsigned long rsvrd2;	    /* reserved 			 */
+	unsigned long rsvrd3;	    /* reserved 			 */
+	unsigned long rsvrd4;	    /* reserved 			 */
+} __packed;
+
+struct hws_basic_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:4;	    /* 16-19 reserved			 */
+	unsigned int U:4;	    /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;	    /* zeros				 */
+	unsigned int T:1;	    /* 26 PSW DAT mode			 */
+	unsigned int W:1;	    /* 27 PSW wait state		 */
+	unsigned int P:1;	    /* 28 PSW Problem state		 */
+	unsigned int AS:2;	    /* 29-30 PSW address-space control	 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	unsigned int:16;
+	unsigned int prim_asn:16;   /* primary ASN			 */
+	unsigned long long ia;	    /* Instruction Address		 */
+	unsigned long long gpp;     /* Guest Program Parameter		 */
+	unsigned long long hpp;     /* Host Program Parameter		 */
+} __packed;
+
+struct hws_diag_entry {
+	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
+	unsigned int R:14;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int I:1;	    /* 31 entry valid or invalid	 */
+	u8	     data[];	    /* Machine-dependent sample data	 */
+} __packed;
+
+struct hws_combined_entry {
+	struct hws_basic_entry	basic;	/* Basic-sampling data entry */
+	struct hws_diag_entry	diag;	/* Diagnostic-sampling data entry */
+} __packed;
+
+struct hws_trailer_entry {
+	union {
+		struct {
+			unsigned int f:1;	/* 0 - Block Full Indicator   */
+			unsigned int a:1;	/* 1 - Alert request control  */
+			unsigned int t:1;	/* 2 - Timestamp format	      */
+			unsigned long long:61;	/* 3 - 63: Reserved	      */
+		};
+		unsigned long long flags;	/* 0 - 63: All indicators     */
+	};
+	unsigned long long overflow;	 /* 64 - sample Overflow count	      */
+	unsigned char timestamp[16];	 /* 16 - 31 timestamp		      */
+	unsigned long long reserved1;	 /* 32 -Reserved		      */
+	unsigned long long reserved2;	 /*				      */
+	unsigned long long progusage1;	 /* 48 - reserved for programming use */
+	unsigned long long progusage2;	 /*				      */
+} __packed;
+
+/* Query counter information */
+static inline int qctri(struct cpumf_ctr_info *info)
+{
+	int rc = -EINVAL;
+
+	asm volatile (
+		"0:	.insn	s,0xb28e0000,%1\n"
+		"1:	lhi	%0,0\n"
+		"2:\n"
+		EX_TABLE(1b, 2b)
+		: "+d" (rc), "=Q" (*info));
+	return rc;
+}
+
+/* Load CPU-counter-set controls */
+static inline int lcctl(u64 ctl)
+{
+	int cc;
+
+	asm volatile (
+		"	.insn	s,0xb2840000,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc) : "m" (ctl) : "cc");
+	return cc;
+}
+
+/* Extract CPU counter */
+static inline int ecctr(u64 ctr, u64 *val)
+{
+	register u64 content asm("4") = 0;
+	int cc;
+
+	asm volatile (
+		"	.insn	rre,0xb2e40000,%0,%2\n"
+		"	ipm	%1\n"
+		"	srl	%1,28\n"
+		: "=d" (content), "=d" (cc) : "d" (ctr) : "cc");
+	if (!cc)
+		*val = content;
+	return cc;
+}
+
+/* Store CPU counter multiple for the MT utilization counter set */
+static inline int stcctm5(u64 num, u64 *val)
+{
+	typedef struct { u64 _[num]; } addrtype;
+	int cc;
+
+	asm volatile (
+		"	.insn	rsy,0xeb0000000017,%2,5,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc), "=Q" (*(addrtype *) val)  : "d" (num) : "cc");
+	return cc;
+}
+
+/* Query sampling information */
+static inline int qsi(struct hws_qsi_info_block *info)
+{
+	int cc;
+	cc = 1;
+
+	asm volatile(
+		"0:	.insn	s,0xb2860000,0(%1)\n"
+		"1:	lhi	%0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (cc), "+a" (info)
+		: "m" (*info)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+/* Load sampling controls */
+static inline int lsctl(struct hws_lsctl_request_block *req)
+{
+	int cc;
+
+	cc = 1;
+	asm volatile(
+		"0:	.insn	s,0xb2870000,0(%1)\n"
+		"1:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (cc), "+a" (req)
+		: "m" (*req)
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+/* Sampling control helper functions */
+
+#include <linux/time.h>
+
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+						unsigned long freq)
+{
+	return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+						unsigned long rate)
+{
+	return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+#define SDB_TE_ALERT_REQ_MASK	0x4000000000000000UL
+#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+	/* TOD in STCKE format */
+	if (te->t)
+		return *((unsigned long long *) &te->timestamp[1]);
+
+	/* TOD in STCK format */
+	return *((unsigned long long *) &te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *) v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return (unsigned long *) ret;
+}
+
+/* Return if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt */
+static inline int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1ul ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return (unsigned long *) (*s & ~0x1ul);
+}
+#endif /* _ASM_S390_CPU_MF_H */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
new file mode 100644
index 000000000..221b454c7
--- /dev/null
+++ b/arch/s390/include/asm/cputime.h
@@ -0,0 +1,148 @@
+/*
+ *  Copyright IBM Corp. 2004
+ *
+ *  Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _S390_CPUTIME_H
+#define _S390_CPUTIME_H
+
+#include <linux/types.h>
+#include <asm/div64.h>
+
+#define CPUTIME_PER_USEC 4096ULL
+#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC)
+
+/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
+
+typedef unsigned long long __nocast cputime_t;
+typedef unsigned long long __nocast cputime64_t;
+
+#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
+
+static inline unsigned long __div(unsigned long long n, unsigned long base)
+{
+	return n / base;
+}
+
+#define cputime_one_jiffy		jiffies_to_cputime(1)
+
+/*
+ * Convert cputime to jiffies and back.
+ */
+static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
+{
+	return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / HZ);
+}
+
+static inline cputime_t jiffies_to_cputime(const unsigned int jif)
+{
+	return (__force cputime_t)(jif * (CPUTIME_PER_SEC / HZ));
+}
+
+static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
+{
+	unsigned long long jif = (__force unsigned long long) cputime;
+	do_div(jif, CPUTIME_PER_SEC / HZ);
+	return jif;
+}
+
+static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
+{
+	return (__force cputime64_t)(jif * (CPUTIME_PER_SEC / HZ));
+}
+
+/*
+ * Convert cputime to microseconds and back.
+ */
+static inline unsigned int cputime_to_usecs(const cputime_t cputime)
+{
+	return (__force unsigned long long) cputime >> 12;
+}
+
+static inline cputime_t usecs_to_cputime(const unsigned int m)
+{
+	return (__force cputime_t)(m * CPUTIME_PER_USEC);
+}
+
+#define usecs_to_cputime64(m)		usecs_to_cputime(m)
+
+/*
+ * Convert cputime to milliseconds and back.
+ */
+static inline unsigned int cputime_to_secs(const cputime_t cputime)
+{
+	return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / 2) >> 1;
+}
+
+static inline cputime_t secs_to_cputime(const unsigned int s)
+{
+	return (__force cputime_t)(s * CPUTIME_PER_SEC);
+}
+
+/*
+ * Convert cputime to timespec and back.
+ */
+static inline cputime_t timespec_to_cputime(const struct timespec *value)
+{
+	unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
+	return (__force cputime_t)(ret + __div(value->tv_nsec * CPUTIME_PER_USEC, NSEC_PER_USEC));
+}
+
+static inline void cputime_to_timespec(const cputime_t cputime,
+				       struct timespec *value)
+{
+	unsigned long long __cputime = (__force unsigned long long) cputime;
+	value->tv_nsec = (__cputime % CPUTIME_PER_SEC) * NSEC_PER_USEC / CPUTIME_PER_USEC;
+	value->tv_sec = __cputime / CPUTIME_PER_SEC;
+}
+
+/*
+ * Convert cputime to timeval and back.
+ * Since cputime and timeval have the same resolution (microseconds)
+ * this is easy.
+ */
+static inline cputime_t timeval_to_cputime(const struct timeval *value)
+{
+	unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC;
+	return (__force cputime_t)(ret + value->tv_usec * CPUTIME_PER_USEC);
+}
+
+static inline void cputime_to_timeval(const cputime_t cputime,
+				      struct timeval *value)
+{
+	unsigned long long __cputime = (__force unsigned long long) cputime;
+	value->tv_usec = (__cputime % CPUTIME_PER_SEC) / CPUTIME_PER_USEC;
+	value->tv_sec = __cputime / CPUTIME_PER_SEC;
+}
+
+/*
+ * Convert cputime to clock and back.
+ */
+static inline clock_t cputime_to_clock_t(cputime_t cputime)
+{
+	unsigned long long clock = (__force unsigned long long) cputime;
+	do_div(clock, CPUTIME_PER_SEC / USER_HZ);
+	return clock;
+}
+
+static inline cputime_t clock_t_to_cputime(unsigned long x)
+{
+	return (__force cputime_t)(x * (CPUTIME_PER_SEC / USER_HZ));
+}
+
+/*
+ * Convert cputime64 to clock.
+ */
+static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
+{
+	unsigned long long clock = (__force unsigned long long) cputime;
+	do_div(clock, CPUTIME_PER_SEC / USER_HZ);
+	return clock;
+}
+
+cputime64_t arch_cpu_idle_time(int cpu);
+
+#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
+
+#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/crw.h b/arch/s390/include/asm/crw.h
new file mode 100644
index 000000000..7c31d3e25
--- /dev/null
+++ b/arch/s390/include/asm/crw.h
@@ -0,0 +1,69 @@
+/*
+ *   Data definitions for channel report processing
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_CRW_H
+#define _ASM_S390_CRW_H
+
+#include <linux/types.h>
+
+/*
+ * Channel Report Word
+ */
+struct crw {
+	__u32 res1 :  1;   /* reserved zero */
+	__u32 slct :  1;   /* solicited */
+	__u32 oflw :  1;   /* overflow */
+	__u32 chn  :  1;   /* chained */
+	__u32 rsc  :  4;   /* reporting source code */
+	__u32 anc  :  1;   /* ancillary report */
+	__u32 res2 :  1;   /* reserved zero */
+	__u32 erc  :  6;   /* error-recovery code */
+	__u32 rsid : 16;   /* reporting-source ID */
+} __attribute__ ((packed));
+
+typedef void (*crw_handler_t)(struct crw *, struct crw *, int);
+
+extern int crw_register_handler(int rsc, crw_handler_t handler);
+extern void crw_unregister_handler(int rsc);
+extern void crw_handle_channel_report(void);
+void crw_wait_for_channel_report(void);
+
+#define NR_RSCS 16
+
+#define CRW_RSC_MONITOR  0x2  /* monitoring facility */
+#define CRW_RSC_SCH	 0x3  /* subchannel */
+#define CRW_RSC_CPATH	 0x4  /* channel path */
+#define CRW_RSC_CONFIG	 0x9  /* configuration-alert facility */
+#define CRW_RSC_CSS	 0xB  /* channel subsystem */
+
+#define CRW_ERC_EVENT	 0x00 /* event information pending */
+#define CRW_ERC_AVAIL	 0x01 /* available */
+#define CRW_ERC_INIT	 0x02 /* initialized */
+#define CRW_ERC_TERROR	 0x03 /* temporary error */
+#define CRW_ERC_IPARM	 0x04 /* installed parm initialized */
+#define CRW_ERC_TERM	 0x05 /* terminal */
+#define CRW_ERC_PERRN	 0x06 /* perm. error, fac. not init */
+#define CRW_ERC_PERRI	 0x07 /* perm. error, facility init */
+#define CRW_ERC_PMOD	 0x08 /* installed parameters modified */
+
+static inline int stcrw(struct crw *pcrw)
+{
+	int ccode;
+
+	asm volatile(
+		"	stcrw	0(%2)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (ccode), "=m" (*pcrw)
+		: "a" (pcrw)
+		: "cc" );
+	return ccode;
+}
+
+#endif /* _ASM_S390_CRW_H */
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
new file mode 100644
index 000000000..09d1dd46b
--- /dev/null
+++ b/arch/s390/include/asm/css_chars.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_CSS_CHARS_H
+#define _ASM_CSS_CHARS_H
+
+#include <linux/types.h>
+
+struct css_general_char {
+	u64 : 12;
+	u32 dynio : 1;	 /* bit 12 */
+	u32 : 4;
+	u32 eadm : 1;	 /* bit 17 */
+	u32 : 23;
+	u32 aif : 1;	 /* bit 41 */
+	u32 : 3;
+	u32 mcss : 1;	 /* bit 45 */
+	u32 fcs : 1;	 /* bit 46 */
+	u32 : 1;
+	u32 ext_mb : 1;  /* bit 48 */
+	u32 : 7;
+	u32 aif_tdd : 1; /* bit 56 */
+	u32 : 1;
+	u32 qebsm : 1;	 /* bit 58 */
+	u32 : 8;
+	u32 aif_osa : 1; /* bit 67 */
+	u32 : 12;
+	u32 eadm_rf : 1; /* bit 80 */
+	u32 : 1;
+	u32 cib : 1;	 /* bit 82 */
+	u32 : 5;
+	u32 fcx : 1;	 /* bit 88 */
+	u32 : 19;
+	u32 alt_ssi : 1; /* bit 108 */
+	u32:1;
+	u32 narf:1;	 /* bit 110 */
+} __packed;
+
+extern struct css_general_char css_general_characteristics;
+
+#endif
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
new file mode 100644
index 000000000..cfad7fca0
--- /dev/null
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_CTL_REG_H
+#define __ASM_CTL_REG_H
+
+#include <linux/bug.h>
+
+#define __ctl_load(array, low, high) {					\
+	typedef struct { char _[sizeof(array)]; } addrtype;		\
+									\
+	BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+	asm volatile(							\
+		"	lctlg	%1,%2,%0\n"				\
+		: : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high));\
+}
+
+#define __ctl_store(array, low, high) {					\
+	typedef struct { char _[sizeof(array)]; } addrtype;		\
+									\
+	BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\
+	asm volatile(							\
+		"	stctg	%1,%2,%0\n"				\
+		: "=Q" (*(addrtype *)(&array))				\
+		: "i" (low), "i" (high));				\
+}
+
+static inline void __ctl_set_bit(unsigned int cr, unsigned int bit)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, cr, cr);
+	reg |= 1UL << bit;
+	__ctl_load(reg, cr, cr);
+}
+
+static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, cr, cr);
+	reg &= ~(1UL << bit);
+	__ctl_load(reg, cr, cr);
+}
+
+void smp_ctl_set_bit(int cr, int bit);
+void smp_ctl_clear_bit(int cr, int bit);
+
+union ctlreg0 {
+	unsigned long val;
+	struct {
+		unsigned long	   : 32;
+		unsigned long	   : 3;
+		unsigned long lap  : 1; /* Low-address-protection control */
+		unsigned long	   : 4;
+		unsigned long edat : 1; /* Enhanced-DAT-enablement control */
+		unsigned long	   : 23;
+	};
+};
+
+#ifdef CONFIG_SMP
+# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
+# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
+#else
+# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
+# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
+#endif
+
+#endif /* __ASM_CTL_REG_H */
diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h
new file mode 100644
index 000000000..b80941f30
--- /dev/null
+++ b/arch/s390/include/asm/current.h
@@ -0,0 +1,18 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/current.h"
+ */
+
+#ifndef _S390_CURRENT_H
+#define _S390_CURRENT_H
+
+#include <asm/lowcore.h>
+
+struct task_struct;
+
+#define current ((struct task_struct *const)S390_lowcore.current_task)
+
+#endif /* !(_S390_CURRENT_H) */
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
new file mode 100644
index 000000000..0206c8052
--- /dev/null
+++ b/arch/s390/include/asm/debug.h
@@ -0,0 +1,262 @@
+/*
+ *   S/390 debug facility
+ *
+ *    Copyright IBM Corp. 1999, 2000
+ */
+#ifndef DEBUG_H
+#define DEBUG_H
+
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <uapi/asm/debug.h>
+
+#define DEBUG_MAX_LEVEL            6  /* debug levels range from 0 to 6 */
+#define DEBUG_OFF_LEVEL            -1 /* level where debug is switched off */
+#define DEBUG_FLUSH_ALL            -1 /* parameter to flush all areas */
+#define DEBUG_MAX_VIEWS            10 /* max number of views in proc fs */
+#define DEBUG_MAX_NAME_LEN         64 /* max length for a debugfs file name */
+#define DEBUG_DEFAULT_LEVEL        3  /* initial debug level */
+
+#define DEBUG_DIR_ROOT "s390dbf" /* name of debug root directory in proc fs */
+
+#define DEBUG_DATA(entry) (char*)(entry + 1) /* data is stored behind */
+                                             /* the entry information */
+
+typedef struct __debug_entry debug_entry_t;
+
+struct debug_view;
+
+typedef struct debug_info {	
+	struct debug_info* next;
+	struct debug_info* prev;
+	atomic_t ref_count;
+	spinlock_t lock;			
+	int level;
+	int nr_areas;
+	int pages_per_area;
+	int buf_size;
+	int entry_size;	
+	debug_entry_t*** areas;
+	int active_area;
+	int *active_pages;
+	int *active_entries;
+	struct dentry* debugfs_root_entry;
+	struct dentry* debugfs_entries[DEBUG_MAX_VIEWS];
+	struct debug_view* views[DEBUG_MAX_VIEWS];	
+	char name[DEBUG_MAX_NAME_LEN];
+	umode_t mode;
+} debug_info_t;
+
+typedef int (debug_header_proc_t) (debug_info_t* id,
+				   struct debug_view* view,
+				   int area,
+				   debug_entry_t* entry,
+				   char* out_buf);
+
+typedef int (debug_format_proc_t) (debug_info_t* id,
+				   struct debug_view* view, char* out_buf,
+				   const char* in_buf);
+typedef int (debug_prolog_proc_t) (debug_info_t* id,
+				   struct debug_view* view,
+				   char* out_buf);
+typedef int (debug_input_proc_t) (debug_info_t* id,
+				  struct debug_view* view,
+				  struct file* file,
+				  const char __user *user_buf,
+				  size_t in_buf_size, loff_t* offset);
+
+int debug_dflt_header_fn(debug_info_t* id, struct debug_view* view,
+		         int area, debug_entry_t* entry, char* out_buf);						
+				
+struct debug_view {
+	char name[DEBUG_MAX_NAME_LEN];
+	debug_prolog_proc_t* prolog_proc;
+	debug_header_proc_t* header_proc;
+	debug_format_proc_t* format_proc;
+	debug_input_proc_t*  input_proc;
+	void*                private_data;
+};
+
+extern struct debug_view debug_hex_ascii_view;
+extern struct debug_view debug_raw_view;
+extern struct debug_view debug_sprintf_view;
+
+/* do NOT use the _common functions */
+
+debug_entry_t* debug_event_common(debug_info_t* id, int level, 
+                                  const void* data, int length);
+
+debug_entry_t* debug_exception_common(debug_info_t* id, int level, 
+                                      const void* data, int length);
+
+/* Debug Feature API: */
+
+debug_info_t *debug_register(const char *name, int pages, int nr_areas,
+                             int buf_size);
+
+debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas,
+				  int buf_size, umode_t mode, uid_t uid,
+				  gid_t gid);
+
+void debug_unregister(debug_info_t* id);
+
+void debug_set_level(debug_info_t* id, int new_level);
+
+void debug_set_critical(void);
+void debug_stop_all(void);
+
+static inline bool debug_level_enabled(debug_info_t* id, int level)
+{
+	return level <= id->level;
+}
+
+static inline debug_entry_t*
+debug_event(debug_info_t* id, int level, void* data, int length)
+{
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+        return debug_event_common(id,level,data,length);
+}
+
+static inline debug_entry_t*
+debug_int_event(debug_info_t* id, int level, unsigned int tag)
+{
+        unsigned int t=tag;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+        return debug_event_common(id,level,&t,sizeof(unsigned int));
+}
+
+static inline debug_entry_t *
+debug_long_event (debug_info_t* id, int level, unsigned long tag)
+{
+        unsigned long t=tag;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+        return debug_event_common(id,level,&t,sizeof(unsigned long));
+}
+
+static inline debug_entry_t*
+debug_text_event(debug_info_t* id, int level, const char* txt)
+{
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+        return debug_event_common(id,level,txt,strlen(txt));
+}
+
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
+extern debug_entry_t *
+__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
+	__attribute__ ((format(printf, 3, 4)));
+
+#define debug_sprintf_event(_id, _level, _fmt, ...)			\
+({									\
+	debug_entry_t *__ret;						\
+	debug_info_t *__id = _id;					\
+	int __level = _level;						\
+	if ((!__id) || (__level > __id->level))				\
+		__ret = NULL;						\
+	else								\
+		__ret = __debug_sprintf_event(__id, __level,		\
+					      _fmt, ## __VA_ARGS__);	\
+	__ret;								\
+})
+
+static inline debug_entry_t*
+debug_exception(debug_info_t* id, int level, void* data, int length)
+{
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+        return debug_exception_common(id,level,data,length);
+}
+
+static inline debug_entry_t*
+debug_int_exception(debug_info_t* id, int level, unsigned int tag)
+{
+        unsigned int t=tag;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+        return debug_exception_common(id,level,&t,sizeof(unsigned int));
+}
+
+static inline debug_entry_t *
+debug_long_exception (debug_info_t* id, int level, unsigned long tag)
+{
+        unsigned long t=tag;
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+        return debug_exception_common(id,level,&t,sizeof(unsigned long));
+}
+
+static inline debug_entry_t*
+debug_text_exception(debug_info_t* id, int level, const char* txt)
+{
+	if ((!id) || (level > id->level) || (id->pages_per_area == 0))
+		return NULL;
+        return debug_exception_common(id,level,txt,strlen(txt));
+}
+
+/*
+ * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
+ * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ */
+extern debug_entry_t *
+__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
+	__attribute__ ((format(printf, 3, 4)));
+
+#define debug_sprintf_exception(_id, _level, _fmt, ...)			\
+({									\
+	debug_entry_t *__ret;						\
+	debug_info_t *__id = _id;					\
+	int __level = _level;						\
+	if ((!__id) || (__level > __id->level))				\
+		__ret = NULL;						\
+	else								\
+		__ret = __debug_sprintf_exception(__id, __level,	\
+						  _fmt, ## __VA_ARGS__);\
+	__ret;								\
+})
+
+int debug_register_view(debug_info_t* id, struct debug_view* view);
+int debug_unregister_view(debug_info_t* id, struct debug_view* view);
+
+/*
+   define the debug levels:
+   - 0 No debugging output to console or syslog
+   - 1 Log internal errors to syslog, ignore check conditions 
+   - 2 Log internal errors and check conditions to syslog
+   - 3 Log internal errors to console, log check conditions to syslog
+   - 4 Log internal errors and check conditions to console
+   - 5 panic on internal errors, log check conditions to console
+   - 6 panic on both, internal errors and check conditions
+ */
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 4
+#endif
+
+#define INTERNAL_ERRMSG(x,y...) "E" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_WRNMSG(x,y...) "W" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_INFMSG(x,y...) "I" __FILE__ "%d: " x, __LINE__, y
+#define INTERNAL_DEBMSG(x,y...) "D" __FILE__ "%d: " x, __LINE__, y
+
+#if DEBUG_LEVEL > 0
+#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_INFO(x...) printk ( KERN_INFO PRINTK_HEADER x )
+#define PRINT_WARN(x...) printk ( KERN_WARNING PRINTK_HEADER x )
+#define PRINT_ERR(x...) printk ( KERN_ERR PRINTK_HEADER x )
+#define PRINT_FATAL(x...) panic ( PRINTK_HEADER x )
+#else
+#define PRINT_DEBUG(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_INFO(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_WARN(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_ERR(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#define PRINT_FATAL(x...) printk ( KERN_DEBUG PRINTK_HEADER x )
+#endif				/* DASD_DEBUG */
+
+#endif				/* DEBUG_H */
diff --git a/arch/s390/include/asm/delay.h b/arch/s390/include/asm/delay.h
new file mode 100644
index 000000000..3f6e4095f
--- /dev/null
+++ b/arch/s390/include/asm/delay.h
@@ -0,0 +1,24 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/delay.h"
+ *    Copyright (C) 1993 Linus Torvalds
+ *
+ *  Delay routines calling functions in arch/s390/lib/delay.c
+ */
+ 
+#ifndef _S390_DELAY_H
+#define _S390_DELAY_H
+
+void __ndelay(unsigned long long nsecs);
+void __udelay(unsigned long long usecs);
+void udelay_simple(unsigned long long usecs);
+void __delay(unsigned long loops);
+
+#define ndelay(n) __ndelay((unsigned long long) (n))
+#define udelay(n) __udelay((unsigned long long) (n))
+#define mdelay(n) __udelay((unsigned long long) (n) * 1000)
+
+#endif /* defined(_S390_DELAY_H) */
diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h
new file mode 100644
index 000000000..d8f9872b0
--- /dev/null
+++ b/arch/s390/include/asm/device.h
@@ -0,0 +1,7 @@
+/*
+ * Arch specific extensions to struct device
+ *
+ * This file is released under the GPLv2
+ */
+#include <asm-generic/device.h>
+
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
new file mode 100644
index 000000000..7e91c5807
--- /dev/null
+++ b/arch/s390/include/asm/diag.h
@@ -0,0 +1,52 @@
+/*
+ * s390 diagnose functions
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_DIAG_H
+#define _ASM_S390_DIAG_H
+
+/*
+ * Diagnose 10: Release page range
+ */
+static inline void diag10_range(unsigned long start_pfn, unsigned long num_pfn)
+{
+	unsigned long start_addr, end_addr;
+
+	start_addr = start_pfn << PAGE_SHIFT;
+	end_addr = (start_pfn + num_pfn - 1) << PAGE_SHIFT;
+
+	asm volatile(
+		"0:	diag	%0,%1,0x10\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		EX_TABLE(1b, 1b)
+		: : "a" (start_addr), "a" (end_addr));
+}
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+extern int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+struct diag210 {
+	u16 vrdcdvno;	/* device number (input) */
+	u16 vrdclen;	/* data block length (input) */
+	u8 vrdcvcla;	/* virtual device class (output) */
+	u8 vrdcvtyp;	/* virtual device type (output) */
+	u8 vrdcvsta;	/* virtual device status (output) */
+	u8 vrdcvfla;	/* virtual device flags (output) */
+	u8 vrdcrccl;	/* real device class (output) */
+	u8 vrdccrty;	/* real device type (output) */
+	u8 vrdccrmd;	/* real device model (output) */
+	u8 vrdccrft;	/* real device feature (output) */
+} __attribute__((packed, aligned(4)));
+
+extern int diag210(struct diag210 *addr);
+
+#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
new file mode 100644
index 000000000..60323c219
--- /dev/null
+++ b/arch/s390/include/asm/dis.h
@@ -0,0 +1,53 @@
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef __ASM_S390_DIS_H__
+#define __ASM_S390_DIS_H__
+
+/* Type of operand */
+#define OPERAND_GPR	0x1	/* Operand printed as %rx */
+#define OPERAND_FPR	0x2	/* Operand printed as %fx */
+#define OPERAND_AR	0x4	/* Operand printed as %ax */
+#define OPERAND_CR	0x8	/* Operand printed as %cx */
+#define OPERAND_VR	0x10	/* Operand printed as %vx */
+#define OPERAND_DISP	0x20	/* Operand printed as displacement */
+#define OPERAND_BASE	0x40	/* Operand printed as base register */
+#define OPERAND_INDEX	0x80	/* Operand printed as index register */
+#define OPERAND_PCREL	0x100	/* Operand printed as pc-relative symbol */
+#define OPERAND_SIGNED	0x200	/* Operand printed as signed value */
+#define OPERAND_LENGTH	0x400	/* Operand printed as length (+1) */
+
+
+struct s390_operand {
+	int bits;		/* The number of bits in the operand. */
+	int shift;		/* The number of bits to shift. */
+	int flags;		/* One bit syntax flags. */
+};
+
+struct s390_insn {
+	const char name[5];
+	unsigned char opfrag;
+	unsigned char format;
+};
+
+
+static inline int insn_length(unsigned char code)
+{
+	return ((((int) code + 64) >> 7) + 1) << 1;
+}
+
+void show_code(struct pt_regs *regs);
+void print_fn_code(unsigned char *code, unsigned long len);
+int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len);
+struct s390_insn *find_insn(unsigned char *code);
+
+static inline int is_known_insn(unsigned char *code)
+{
+	return !!find_insn(code);
+}
+
+#endif /* __ASM_S390_DIS_H__ */
diff --git a/arch/s390/include/asm/div64.h b/arch/s390/include/asm/div64.h
new file mode 100644
index 000000000..6cd978cef
--- /dev/null
+++ b/arch/s390/include/asm/div64.h
@@ -0,0 +1 @@
+#include <asm-generic/div64.h>
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
new file mode 100644
index 000000000..9d395961e
--- /dev/null
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -0,0 +1,90 @@
+#ifndef _ASM_S390_DMA_MAPPING_H
+#define _ASM_S390_DMA_MAPPING_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-attrs.h>
+#include <linux/dma-debug.h>
+#include <linux/io.h>
+
+#define DMA_ERROR_CODE		(~(dma_addr_t) 0x0)
+
+extern struct dma_map_ops s390_dma_ops;
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+	return &s390_dma_ops;
+}
+
+extern int dma_set_mask(struct device *dev, u64 mask);
+
+static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+				  enum dma_data_direction direction)
+{
+}
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+	struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+	if (dma_ops->dma_supported == NULL)
+		return 1;
+	return dma_ops->dma_supported(dev, mask);
+}
+
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+	if (!dev->dma_mask)
+		return false;
+	return addr + size - 1 <= *dev->dma_mask;
+}
+
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+	debug_dma_mapping_error(dev, dma_addr);
+	if (dma_ops->mapping_error)
+		return dma_ops->mapping_error(dev, dma_addr);
+	return dma_addr == DMA_ERROR_CODE;
+}
+
+#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t flags,
+				    struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	void *cpu_addr;
+
+	BUG_ON(!ops);
+
+	cpu_addr = ops->alloc(dev, size, dma_handle, flags, attrs);
+	debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+
+	return cpu_addr;
+}
+
+#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *cpu_addr, dma_addr_t dma_handle,
+				  struct dma_attrs *attrs)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
+	BUG_ON(!ops);
+
+	debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+	ops->free(dev, size, cpu_addr, dma_handle, attrs);
+}
+
+#endif /* _ASM_S390_DMA_MAPPING_H */
diff --git a/arch/s390/include/asm/dma.h b/arch/s390/include/asm/dma.h
new file mode 100644
index 000000000..bb9bdcd20
--- /dev/null
+++ b/arch/s390/include/asm/dma.h
@@ -0,0 +1,19 @@
+#ifndef _ASM_S390_DMA_H
+#define _ASM_S390_DMA_H
+
+#include <asm/io.h>
+
+/*
+ * MAX_DMA_ADDRESS is ambiguous because on s390 its completely unrelated
+ * to DMA. It _is_ used for the s390 memory zone split at 2GB caused
+ * by the 31 bit heritage.
+ */
+#define MAX_DMA_ADDRESS         0x80000000
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy	(0)
+#endif
+
+#endif /* _ASM_S390_DMA_H */
diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h
new file mode 100644
index 000000000..67026300c
--- /dev/null
+++ b/arch/s390/include/asm/eadm.h
@@ -0,0 +1,117 @@
+#ifndef _ASM_S390_EADM_H
+#define _ASM_S390_EADM_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+struct arqb {
+	u64 data;
+	u16 fmt:4;
+	u16:12;
+	u16 cmd_code;
+	u16:16;
+	u16 msb_count;
+	u32 reserved[12];
+} __packed;
+
+#define ARQB_CMD_MOVE	1
+
+struct arsb {
+	u16 fmt:4;
+	u32:28;
+	u8 ef;
+	u8:8;
+	u8 ecbi;
+	u8:8;
+	u8 fvf;
+	u16:16;
+	u8 eqc;
+	u32:32;
+	u64 fail_msb;
+	u64 fail_aidaw;
+	u64 fail_ms;
+	u64 fail_scm;
+	u32 reserved[4];
+} __packed;
+
+#define EQC_WR_PROHIBIT 22
+
+struct msb {
+	u8 fmt:4;
+	u8 oc:4;
+	u8 flags;
+	u16:12;
+	u16 bs:4;
+	u32 blk_count;
+	u64 data_addr;
+	u64 scm_addr;
+	u64:64;
+} __packed;
+
+struct aidaw {
+	u8 flags;
+	u32 :24;
+	u32 :32;
+	u64 data_addr;
+} __packed;
+
+#define MSB_OC_CLEAR	0
+#define MSB_OC_READ	1
+#define MSB_OC_WRITE	2
+#define MSB_OC_RELEASE	3
+
+#define MSB_FLAG_BNM	0x80
+#define MSB_FLAG_IDA	0x40
+
+#define MSB_BS_4K	0
+#define MSB_BS_1M	1
+
+#define AOB_NR_MSB	124
+
+struct aob {
+	struct arqb request;
+	struct arsb response;
+	struct msb msb[AOB_NR_MSB];
+} __packed __aligned(PAGE_SIZE);
+
+struct aob_rq_header {
+	struct scm_device *scmdev;
+	char data[0];
+};
+
+struct scm_device {
+	u64 address;
+	u64 size;
+	unsigned int nr_max_block;
+	struct device dev;
+	struct {
+		unsigned int persistence:4;
+		unsigned int oper_state:4;
+		unsigned int data_state:4;
+		unsigned int rank:4;
+		unsigned int release:1;
+		unsigned int res_id:8;
+	} __packed attrs;
+};
+
+#define OP_STATE_GOOD		1
+#define OP_STATE_TEMP_ERR	2
+#define OP_STATE_PERM_ERR	3
+
+enum scm_event {SCM_CHANGE, SCM_AVAIL};
+
+struct scm_driver {
+	struct device_driver drv;
+	int (*probe) (struct scm_device *scmdev);
+	int (*remove) (struct scm_device *scmdev);
+	void (*notify) (struct scm_device *scmdev, enum scm_event event);
+	void (*handler) (struct scm_device *scmdev, void *data, int error);
+};
+
+int scm_driver_register(struct scm_driver *scmdrv);
+void scm_driver_unregister(struct scm_driver *scmdrv);
+
+int eadm_start_aob(struct aob *aob);
+void scm_irq_handler(struct aob *aob, int error);
+
+#endif /* _ASM_S390_EADM_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
new file mode 100644
index 000000000..c5befc5a3
--- /dev/null
+++ b/arch/s390/include/asm/ebcdic.h
@@ -0,0 +1,48 @@
+/*
+ *    EBCDIC -> ASCII, ASCII -> EBCDIC conversion routines.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _EBCDIC_H
+#define _EBCDIC_H
+
+#ifndef _S390_TYPES_H
+#include <types.h>
+#endif
+
+extern __u8 _ascebc_500[256];   /* ASCII -> EBCDIC 500 conversion table */
+extern __u8 _ebcasc_500[256];   /* EBCDIC 500 -> ASCII conversion table */
+extern __u8 _ascebc[256];   /* ASCII -> EBCDIC conversion table */
+extern __u8 _ebcasc[256];   /* EBCDIC -> ASCII conversion table */
+extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */
+extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
+
+static inline void
+codepage_convert(const __u8 *codepage, volatile __u8 * addr, unsigned long nr)
+{
+	if (nr-- <= 0)
+		return;
+	asm volatile(
+		"	bras	1,1f\n"
+		"	tr	0(1,%0),0(%2)\n"
+		"0:	tr	0(256,%0),0(%2)\n"
+		"	la	%0,256(%0)\n"
+		"1:	ahi	%1,-256\n"
+		"	jnm	0b\n"
+		"	ex	%1,0(1)"
+		: "+&a" (addr), "+&a" (nr)
+		: "a" (codepage) : "cc", "memory", "1");
+}
+
+#define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
+#define EBCASC(addr,nr) codepage_convert(_ebcasc, addr, nr)
+#define ASCEBC_500(addr,nr) codepage_convert(_ascebc_500, addr, nr)
+#define EBCASC_500(addr,nr) codepage_convert(_ebcasc_500, addr, nr)
+#define EBC_TOLOWER(addr,nr) codepage_convert(_ebc_tolower, addr, nr)
+#define EBC_TOUPPER(addr,nr) codepage_convert(_ebc_toupper, addr, nr)
+
+#endif
+
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
new file mode 100644
index 000000000..3ad48f22d
--- /dev/null
+++ b/arch/s390/include/asm/elf.h
@@ -0,0 +1,227 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/elf.h"
+ */
+
+#ifndef __ASMS390_ELF_H
+#define __ASMS390_ELF_H
+
+/* s390 relocations defined by the ABIs */
+#define R_390_NONE		0	/* No reloc.  */
+#define R_390_8			1	/* Direct 8 bit.  */
+#define R_390_12		2	/* Direct 12 bit.  */
+#define R_390_16		3	/* Direct 16 bit.  */
+#define R_390_32		4	/* Direct 32 bit.  */
+#define R_390_PC32		5	/* PC relative 32 bit.	*/
+#define R_390_GOT12		6	/* 12 bit GOT offset.  */
+#define R_390_GOT32		7	/* 32 bit GOT offset.  */
+#define R_390_PLT32		8	/* 32 bit PC relative PLT address.  */
+#define R_390_COPY		9	/* Copy symbol at runtime.  */
+#define R_390_GLOB_DAT		10	/* Create GOT entry.  */
+#define R_390_JMP_SLOT		11	/* Create PLT entry.  */
+#define R_390_RELATIVE		12	/* Adjust by program base.  */
+#define R_390_GOTOFF32		13	/* 32 bit offset to GOT.	 */
+#define R_390_GOTPC		14	/* 32 bit PC rel. offset to GOT.  */
+#define R_390_GOT16		15	/* 16 bit GOT offset.  */
+#define R_390_PC16		16	/* PC relative 16 bit.	*/
+#define R_390_PC16DBL		17	/* PC relative 16 bit shifted by 1.  */
+#define R_390_PLT16DBL		18	/* 16 bit PC rel. PLT shifted by 1.  */
+#define R_390_PC32DBL		19	/* PC relative 32 bit shifted by 1.  */
+#define R_390_PLT32DBL		20	/* 32 bit PC rel. PLT shifted by 1.  */
+#define R_390_GOTPCDBL		21	/* 32 bit PC rel. GOT shifted by 1.  */
+#define R_390_64		22	/* Direct 64 bit.  */
+#define R_390_PC64		23	/* PC relative 64 bit.	*/
+#define R_390_GOT64		24	/* 64 bit GOT offset.  */
+#define R_390_PLT64		25	/* 64 bit PC relative PLT address.  */
+#define R_390_GOTENT		26	/* 32 bit PC rel. to GOT entry >> 1. */
+#define R_390_GOTOFF16		27	/* 16 bit offset to GOT. */
+#define R_390_GOTOFF64		28	/* 64 bit offset to GOT. */
+#define R_390_GOTPLT12		29	/* 12 bit offset to jump slot.	*/
+#define R_390_GOTPLT16		30	/* 16 bit offset to jump slot.	*/
+#define R_390_GOTPLT32		31	/* 32 bit offset to jump slot.	*/
+#define R_390_GOTPLT64		32	/* 64 bit offset to jump slot.	*/
+#define R_390_GOTPLTENT		33	/* 32 bit rel. offset to jump slot.  */
+#define R_390_PLTOFF16		34	/* 16 bit offset from GOT to PLT. */
+#define R_390_PLTOFF32		35	/* 32 bit offset from GOT to PLT. */
+#define R_390_PLTOFF64		36	/* 16 bit offset from GOT to PLT. */
+#define R_390_TLS_LOAD		37	/* Tag for load insn in TLS code. */
+#define R_390_TLS_GDCALL	38	/* Tag for function call in general
+                                           dynamic TLS code.  */
+#define R_390_TLS_LDCALL	39	/* Tag for function call in local
+                                           dynamic TLS code.  */
+#define R_390_TLS_GD32		40	/* Direct 32 bit for general dynamic
+                                           thread local data.  */
+#define R_390_TLS_GD64		41	/* Direct 64 bit for general dynamic
+                                           thread local data.  */
+#define R_390_TLS_GOTIE12	42	/* 12 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_GOTIE32	43	/* 32 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_GOTIE64	44	/* 64 bit GOT offset for static TLS
+                                           block offset.  */
+#define R_390_TLS_LDM32		45	/* Direct 32 bit for local dynamic
+                                           thread local data in LD code.  */
+#define R_390_TLS_LDM64		46	/* Direct 64 bit for local dynamic
+                                           thread local data in LD code.  */
+#define R_390_TLS_IE32		47	/* 32 bit address of GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_IE64		48	/* 64 bit address of GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_IEENT		49	/* 32 bit rel. offset to GOT entry for
+                                           negated static TLS block offset.  */
+#define R_390_TLS_LE32		50	/* 32 bit negated offset relative to
+                                           static TLS block.  */
+#define R_390_TLS_LE64		51	/* 64 bit negated offset relative to
+                                           static TLS block.  */
+#define R_390_TLS_LDO32		52	/* 32 bit offset relative to TLS
+                                           block.  */
+#define R_390_TLS_LDO64		53	/* 64 bit offset relative to TLS
+                                           block.  */
+#define R_390_TLS_DTPMOD	54	/* ID of module containing symbol.  */
+#define R_390_TLS_DTPOFF	55	/* Offset in TLS block.  */
+#define R_390_TLS_TPOFF		56	/* Negate offset in static TLS
+                                           block.  */
+#define R_390_20		57	/* Direct 20 bit.  */
+#define R_390_GOT20		58	/* 20 bit GOT offset.  */
+#define R_390_GOTPLT20		59	/* 20 bit offset to jump slot.  */
+#define R_390_TLS_GOTIE20	60	/* 20 bit GOT offset for static TLS
+					   block offset.  */
+/* Keep this the last entry.  */
+#define R_390_NUM	61
+
+/* Bits present in AT_HWCAP. */
+#define HWCAP_S390_ESAN3	1
+#define HWCAP_S390_ZARCH	2
+#define HWCAP_S390_STFLE	4
+#define HWCAP_S390_MSA		8
+#define HWCAP_S390_LDISP	16
+#define HWCAP_S390_EIMM		32
+#define HWCAP_S390_DFP		64
+#define HWCAP_S390_HPAGE	128
+#define HWCAP_S390_ETF3EH	256
+#define HWCAP_S390_HIGH_GPRS	512
+#define HWCAP_S390_TE		1024
+#define HWCAP_S390_VXRS		2048
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS	ELFCLASS64
+#define ELF_DATA	ELFDATA2MSB
+#define ELF_ARCH	EM_S390
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/compat.h>
+#include <asm/syscall.h>
+#include <asm/user.h>
+
+typedef s390_fp_regs elf_fpregset_t;
+typedef s390_regs elf_gregset_t;
+
+typedef s390_fp_regs compat_elf_fpregset_t;
+typedef s390_compat_regs compat_elf_gregset_t;
+
+#include <linux/sched.h>	/* for task_struct */
+#include <asm/mmu_context.h>
+
+#include <asm/vdso.h>
+
+extern unsigned int vdso_enabled;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+	(((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
+         && (x)->e_ident[EI_CLASS] == ELF_CLASS) 
+#define compat_elf_check_arch(x) \
+	(((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
+	 && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define compat_start_thread	start_thread31
+
+/* For SVR4/S390 the function pointer to be registered with `atexit` is
+   passed in R14. */
+#define ELF_PLAT_INIT(_r, load_addr) \
+	do { \
+		_r->gprs[14] = 0; \
+	} while (0)
+
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE	4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk. 64-bit
+   tasks are aligned to 4GB. */
+#define ELF_ET_DYN_BASE (is_32bit_task() ? \
+				(STACK_TOP / 3 * 2) : \
+				(STACK_TOP / 3 * 2) & ~((1UL << 32) - 1))
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this CPU supports. */
+
+extern unsigned long elf_hwcap;
+#define ELF_HWCAP (elf_hwcap)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.
+
+   For the moment, we have only optimizations for the Intel generations,
+   but that could change... */
+
+#define ELF_PLATFORM_SIZE 8
+extern char elf_platform[];
+#define ELF_PLATFORM (elf_platform)
+
+#ifndef CONFIG_COMPAT
+#define SET_PERSONALITY(ex) \
+do {								\
+	set_personality(PER_LINUX |				\
+		(current->personality & (~PER_MASK)));		\
+	current_thread_info()->sys_call_table = 		\
+		(unsigned long) &sys_call_table;		\
+} while (0)
+#else /* CONFIG_COMPAT */
+#define SET_PERSONALITY(ex)					\
+do {								\
+	if (personality(current->personality) != PER_LINUX32)	\
+		set_personality(PER_LINUX |			\
+			(current->personality & ~PER_MASK));	\
+	if ((ex).e_ident[EI_CLASS] == ELFCLASS32) {		\
+		set_thread_flag(TIF_31BIT);			\
+		current_thread_info()->sys_call_table =		\
+			(unsigned long)	&sys_call_table_emu;	\
+	} else {						\
+		clear_thread_flag(TIF_31BIT);			\
+		current_thread_info()->sys_call_table =		\
+			(unsigned long) &sys_call_table;	\
+	}							\
+} while (0)
+#endif /* CONFIG_COMPAT */
+
+extern unsigned long mmap_rnd_mask;
+
+#define STACK_RND_MASK	(test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
+
+#define ARCH_DLINFO							    \
+do {									    \
+	if (vdso_enabled)						    \
+		NEW_AUX_ENT(AT_SYSINFO_EHDR,				    \
+			    (unsigned long)current->mm->context.vdso_base); \
+} while (0)
+
+struct linux_binprm;
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+int arch_setup_additional_pages(struct linux_binprm *, int);
+
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs);
+
+#endif
diff --git a/arch/s390/include/asm/emergency-restart.h b/arch/s390/include/asm/emergency-restart.h
new file mode 100644
index 000000000..108d8c48e
--- /dev/null
+++ b/arch/s390/include/asm/emergency-restart.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_EMERGENCY_RESTART_H
+#define _ASM_EMERGENCY_RESTART_H
+
+#include <asm-generic/emergency-restart.h>
+
+#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/s390/include/asm/etr.h b/arch/s390/include/asm/etr.h
new file mode 100644
index 000000000..629b79a93
--- /dev/null
+++ b/arch/s390/include/asm/etr.h
@@ -0,0 +1,256 @@
+/*
+ *  Copyright IBM Corp. 2006
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+#ifndef __S390_ETR_H
+#define __S390_ETR_H
+
+/* ETR attachment control register */
+struct etr_eacr {
+	unsigned int e0		: 1;	/* port 0 stepping control */
+	unsigned int e1		: 1;	/* port 1 stepping control */
+	unsigned int _pad0	: 5;	/* must be 00100 */
+	unsigned int dp		: 1;	/* data port control */
+	unsigned int p0		: 1;	/* port 0 change recognition control */
+	unsigned int p1		: 1;	/* port 1 change recognition control */
+	unsigned int _pad1	: 3;	/* must be 000 */
+	unsigned int ea		: 1;	/* ETR alert control */
+	unsigned int es		: 1;	/* ETR sync check control */
+	unsigned int sl		: 1;	/* switch to local control */
+} __attribute__ ((packed));
+
+/* Port state returned by steai */
+enum etr_psc {
+	etr_psc_operational = 0,
+	etr_psc_semi_operational = 1,
+	etr_psc_protocol_error =  4,
+	etr_psc_no_symbols = 8,
+	etr_psc_no_signal = 12,
+	etr_psc_pps_mode = 13
+};
+
+/* Logical port state returned by stetr */
+enum etr_lpsc {
+	etr_lpsc_operational_step = 0,
+	etr_lpsc_operational_alt = 1,
+	etr_lpsc_semi_operational = 2,
+	etr_lpsc_protocol_error =  4,
+	etr_lpsc_no_symbol_sync = 8,
+	etr_lpsc_no_signal = 12,
+	etr_lpsc_pps_mode = 13
+};
+
+/* ETR status words */
+struct etr_esw {
+	struct etr_eacr eacr;		/* attachment control register */
+	unsigned int y		: 1;	/* stepping mode */
+	unsigned int _pad0	: 5;	/* must be 00000 */
+	unsigned int p		: 1;	/* stepping port number */
+	unsigned int q		: 1;	/* data port number */
+	unsigned int psc0	: 4;	/* port 0 state code */
+	unsigned int psc1	: 4;	/* port 1 state code */
+} __attribute__ ((packed));
+
+/* Second level data register status word */
+struct etr_slsw {
+	unsigned int vv1	: 1;	/* copy of validity bit data frame 1 */
+	unsigned int vv2	: 1;	/* copy of validity bit data frame 2 */
+	unsigned int vv3	: 1;	/* copy of validity bit data frame 3 */
+	unsigned int vv4	: 1;	/* copy of validity bit data frame 4 */
+	unsigned int _pad0	: 19;	/* must by all zeroes */
+	unsigned int n		: 1;	/* EAF port number */
+	unsigned int v1		: 1;	/* validity bit ETR data frame 1 */
+	unsigned int v2		: 1;	/* validity bit ETR data frame 2 */
+	unsigned int v3		: 1;	/* validity bit ETR data frame 3 */
+	unsigned int v4		: 1;	/* validity bit ETR data frame 4 */
+	unsigned int _pad1	: 4;	/* must be 0000 */
+} __attribute__ ((packed));
+
+/* ETR data frames */
+struct etr_edf1 {
+	unsigned int u		: 1;	/* untuned bit */
+	unsigned int _pad0	: 1;	/* must be 0 */
+	unsigned int r		: 1;	/* service request bit */
+	unsigned int _pad1	: 4;	/* must be 0000 */
+	unsigned int a		: 1;	/* time adjustment bit */
+	unsigned int net_id	: 8;	/* ETR network id */
+	unsigned int etr_id	: 8;	/* id of ETR which sends data frames */
+	unsigned int etr_pn	: 8;	/* port number of ETR output port */
+} __attribute__ ((packed));
+
+struct etr_edf2 {
+	unsigned int etv	: 32;	/* Upper 32 bits of TOD. */
+} __attribute__ ((packed));
+
+struct etr_edf3 {
+	unsigned int rc		: 8;	/* failure reason code */
+	unsigned int _pad0	: 3;	/* must be 000 */
+	unsigned int c		: 1;	/* ETR coupled bit */
+	unsigned int tc		: 4;	/* ETR type code */
+	unsigned int blto	: 8;	/* biased local time offset */
+					/* (blto - 128) * 15 = minutes */
+	unsigned int buo	: 8;	/* biased utc offset */
+					/* (buo - 128) = leap seconds */
+} __attribute__ ((packed));
+
+struct etr_edf4 {
+	unsigned int ed		: 8;	/* ETS device dependent data */
+	unsigned int _pad0	: 1;	/* must be 0 */
+	unsigned int buc	: 5;	/* biased ut1 correction */
+					/* (buc - 16) * 0.1 seconds */
+	unsigned int em		: 6;	/* ETS error magnitude */
+	unsigned int dc		: 6;	/* ETS drift code */
+	unsigned int sc		: 6;	/* ETS steering code */
+} __attribute__ ((packed));
+
+/*
+ * ETR attachment information block, two formats
+ * format 1 has 4 reserved words with a size of 64 bytes
+ * format 2 has 16 reserved words with a size of 96 bytes
+ */
+struct etr_aib {
+	struct etr_esw esw;
+	struct etr_slsw slsw;
+	unsigned long long tsp;
+	struct etr_edf1 edf1;
+	struct etr_edf2 edf2;
+	struct etr_edf3 edf3;
+	struct etr_edf4 edf4;
+	unsigned int reserved[16];
+} __attribute__ ((packed,aligned(8)));
+
+/* ETR interruption parameter */
+struct etr_irq_parm {
+	unsigned int _pad0	: 8;
+	unsigned int pc0	: 1;	/* port 0 state change */
+	unsigned int pc1	: 1;	/* port 1 state change */
+	unsigned int _pad1	: 3;
+	unsigned int eai	: 1;	/* ETR alert indication */
+	unsigned int _pad2	: 18;
+} __attribute__ ((packed));
+
+/* Query TOD offset result */
+struct etr_ptff_qto {
+	unsigned long long physical_clock;
+	unsigned long long tod_offset;
+	unsigned long long logical_tod_offset;
+	unsigned long long tod_epoch_difference;
+} __attribute__ ((packed));
+
+/* Inline assembly helper functions */
+static inline int etr_setr(struct etr_eacr *ctrl)
+{
+	int rc = -EOPNOTSUPP;
+
+	asm volatile(
+		"	.insn	s,0xb2160000,%1\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) : "Q" (*ctrl));
+	return rc;
+}
+
+/* Stores a format 1 aib with 64 bytes */
+static inline int etr_stetr(struct etr_aib *aib)
+{
+	int rc = -EOPNOTSUPP;
+
+	asm volatile(
+		"	.insn	s,0xb2170000,%1\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) : "Q" (*aib));
+	return rc;
+}
+
+/* Stores a format 2 aib with 96 bytes for specified port */
+static inline int etr_steai(struct etr_aib *aib, unsigned int func)
+{
+	register unsigned int reg0 asm("0") = func;
+	int rc = -EOPNOTSUPP;
+
+	asm volatile(
+		"	.insn	s,0xb2b30000,%1\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) : "Q" (*aib), "d" (reg0));
+	return rc;
+}
+
+/* Function codes for the steai instruction. */
+#define ETR_STEAI_STEPPING_PORT		0x10
+#define ETR_STEAI_ALTERNATE_PORT	0x11
+#define ETR_STEAI_PORT_0		0x12
+#define ETR_STEAI_PORT_1		0x13
+
+static inline int etr_ptff(void *ptff_block, unsigned int func)
+{
+	register unsigned int reg0 asm("0") = func;
+	register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
+	int rc = -EOPNOTSUPP;
+
+	asm volatile(
+		"	.word	0x0104\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc), "=m" (ptff_block)
+		: "d" (reg0), "d" (reg1), "m" (ptff_block) : "cc");
+	return rc;
+}
+
+/* Function codes for the ptff instruction. */
+#define ETR_PTFF_QAF	0x00	/* query available functions */
+#define ETR_PTFF_QTO	0x01	/* query tod offset */
+#define ETR_PTFF_QSI	0x02	/* query steering information */
+#define ETR_PTFF_ATO	0x40	/* adjust tod offset */
+#define ETR_PTFF_STO	0x41	/* set tod offset */
+#define ETR_PTFF_SFS	0x42	/* set fine steering rate */
+#define ETR_PTFF_SGS	0x43	/* set gross steering rate */
+
+/* Functions needed by the machine check handler */
+void etr_switch_to_local(void);
+void etr_sync_check(void);
+
+/* STP interruption parameter */
+struct stp_irq_parm {
+	unsigned int _pad0	: 14;
+	unsigned int tsc	: 1;	/* Timing status change */
+	unsigned int lac	: 1;	/* Link availability change */
+	unsigned int tcpc	: 1;	/* Time control parameter change */
+	unsigned int _pad2	: 15;
+} __attribute__ ((packed));
+
+#define STP_OP_SYNC	1
+#define STP_OP_CTRL	3
+
+struct stp_sstpi {
+	unsigned int rsvd0;
+	unsigned int rsvd1 : 8;
+	unsigned int stratum : 8;
+	unsigned int vbits : 16;
+	unsigned int leaps : 16;
+	unsigned int tmd : 4;
+	unsigned int ctn : 4;
+	unsigned int rsvd2 : 3;
+	unsigned int c : 1;
+	unsigned int tst : 4;
+	unsigned int tzo : 16;
+	unsigned int dsto : 16;
+	unsigned int ctrl : 16;
+	unsigned int rsvd3 : 16;
+	unsigned int tto;
+	unsigned int rsvd4;
+	unsigned int ctnid[3];
+	unsigned int rsvd5;
+	unsigned int todoff[4];
+	unsigned int rsvd6[48];
+} __attribute__ ((packed));
+
+/* Functions needed by the machine check handler */
+void stp_sync_check(void);
+void stp_island_check(void);
+
+#endif /* __S390_ETR_H */
diff --git a/arch/s390/include/asm/exec.h b/arch/s390/include/asm/exec.h
new file mode 100644
index 000000000..c4a93d632
--- /dev/null
+++ b/arch/s390/include/asm/exec.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_EXEC_H
+#define __ASM_EXEC_H
+
+extern unsigned long arch_align_stack(unsigned long sp);
+
+#endif /* __ASM_EXEC_H */
diff --git a/arch/s390/include/asm/extmem.h b/arch/s390/include/asm/extmem.h
new file mode 100644
index 000000000..6276002d7
--- /dev/null
+++ b/arch/s390/include/asm/extmem.h
@@ -0,0 +1,31 @@
+/*
+ *  definitions for external memory segment support
+ *  Copyright IBM Corp. 2003
+ */
+
+#ifndef _ASM_S390X_DCSS_H
+#define _ASM_S390X_DCSS_H
+#ifndef __ASSEMBLY__
+
+/* possible values for segment type as returned by segment_info */
+#define SEG_TYPE_SW 0
+#define SEG_TYPE_EW 1
+#define SEG_TYPE_SR 2
+#define SEG_TYPE_ER 3
+#define SEG_TYPE_SN 4
+#define SEG_TYPE_EN 5
+#define SEG_TYPE_SC 6
+#define SEG_TYPE_EWEN 7
+
+#define SEGMENT_SHARED 0
+#define SEGMENT_EXCLUSIVE 1
+
+int segment_load (char *name, int segtype, unsigned long *addr, unsigned long *length);
+void segment_unload(char *name);
+void segment_save(char *name);
+int segment_type (char* name);
+int segment_modify_shared (char *name, int do_nonshared);
+void segment_warning(int rc, char *seg_name);
+
+#endif
+#endif
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
new file mode 100644
index 000000000..0aa6a7ed9
--- /dev/null
+++ b/arch/s390/include/asm/facility.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_FACILITY_H
+#define __ASM_FACILITY_H
+
+#include <linux/string.h>
+#include <linux/preempt.h>
+#include <asm/lowcore.h>
+
+#define MAX_FACILITY_BIT (256*8)	/* stfle_fac_list has 256 bytes */
+
+static inline int __test_facility(unsigned long nr, void *facilities)
+{
+	unsigned char *ptr;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return 0;
+	ptr = (unsigned char *) facilities + (nr >> 3);
+	return (*ptr & (0x80 >> (nr & 7))) != 0;
+}
+
+/*
+ * The test_facility function uses the bit odering where the MSB is bit 0.
+ * That makes it easier to query facility bits with the bit number as
+ * documented in the Principles of Operation.
+ */
+static inline int test_facility(unsigned long nr)
+{
+	return __test_facility(nr, &S390_lowcore.stfle_fac_list);
+}
+
+/**
+ * stfle - Store facility list extended
+ * @stfle_fac_list: array where facility list can be stored
+ * @size: size of passed in array in double words
+ */
+static inline void stfle(u64 *stfle_fac_list, int size)
+{
+	unsigned long nr;
+
+	preempt_disable();
+	asm volatile(
+		"	.insn s,0xb2b10000,0(0)\n" /* stfl */
+		"0:\n"
+		EX_TABLE(0b, 0b)
+		: "+m" (S390_lowcore.stfl_fac_list));
+	nr = 4; /* bytes stored by stfl */
+	memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
+	if (S390_lowcore.stfl_fac_list & 0x01000000) {
+		/* More facility bits available with stfle */
+		register unsigned long reg0 asm("0") = size - 1;
+
+		asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */
+			     : "+d" (reg0)
+			     : "a" (stfle_fac_list)
+			     : "memory", "cc");
+		nr = (reg0 + 1) * 8; /* # bytes stored by stfle */
+	}
+	memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
+	preempt_enable();
+}
+
+#endif /* __ASM_FACILITY_H */
diff --git a/arch/s390/include/asm/fb.h b/arch/s390/include/asm/fb.h
new file mode 100644
index 000000000..c7df38030
--- /dev/null
+++ b/arch/s390/include/asm/fb.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_FB_H_
+#define _ASM_FB_H_
+#include <linux/fb.h>
+
+#define fb_pgprotect(...) do {} while (0)
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	return 0;
+}
+
+#endif /* _ASM_FB_H_ */
diff --git a/arch/s390/include/asm/fcx.h b/arch/s390/include/asm/fcx.h
new file mode 100644
index 000000000..7ecb92b46
--- /dev/null
+++ b/arch/s390/include/asm/fcx.h
@@ -0,0 +1,311 @@
+/*
+ *  Functions for assembling fcx enabled I/O control blocks.
+ *
+ *    Copyright IBM Corp. 2008
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_FCX_H
+#define _ASM_S390_FCX_H _ASM_S390_FCX_H
+
+#include <linux/types.h>
+
+#define TCW_FORMAT_DEFAULT		0
+#define TCW_TIDAW_FORMAT_DEFAULT	0
+#define TCW_FLAGS_INPUT_TIDA		(1 << (23 - 5))
+#define TCW_FLAGS_TCCB_TIDA		(1 << (23 - 6))
+#define TCW_FLAGS_OUTPUT_TIDA		(1 << (23 - 7))
+#define TCW_FLAGS_TIDAW_FORMAT(x)	((x) & 3) << (23 - 9)
+#define TCW_FLAGS_GET_TIDAW_FORMAT(x)	(((x) >> (23 - 9)) & 3)
+
+/**
+ * struct tcw - Transport Control Word (TCW)
+ * @format: TCW format
+ * @flags: TCW flags
+ * @tccbl: Transport-Command-Control-Block Length
+ * @r: Read Operations
+ * @w: Write Operations
+ * @output: Output-Data Address
+ * @input: Input-Data Address
+ * @tsb: Transport-Status-Block Address
+ * @tccb: Transport-Command-Control-Block Address
+ * @output_count: Output Count
+ * @input_count: Input Count
+ * @intrg: Interrogate TCW Address
+ */
+struct tcw {
+	u32 format:2;
+	u32 :6;
+	u32 flags:24;
+	u32 :8;
+	u32 tccbl:6;
+	u32 r:1;
+	u32 w:1;
+	u32 :16;
+	u64 output;
+	u64 input;
+	u64 tsb;
+	u64 tccb;
+	u32 output_count;
+	u32 input_count;
+	u32 :32;
+	u32 :32;
+	u32 :32;
+	u32 intrg;
+} __attribute__ ((packed, aligned(64)));
+
+#define TIDAW_FLAGS_LAST		(1 << (7 - 0))
+#define TIDAW_FLAGS_SKIP		(1 << (7 - 1))
+#define TIDAW_FLAGS_DATA_INT		(1 << (7 - 2))
+#define TIDAW_FLAGS_TTIC		(1 << (7 - 3))
+#define TIDAW_FLAGS_INSERT_CBC		(1 << (7 - 4))
+
+/**
+ * struct tidaw - Transport-Indirect-Addressing Word (TIDAW)
+ * @flags: TIDAW flags. Can be an arithmetic OR of the following constants:
+ * %TIDAW_FLAGS_LAST, %TIDAW_FLAGS_SKIP, %TIDAW_FLAGS_DATA_INT,
+ * %TIDAW_FLAGS_TTIC, %TIDAW_FLAGS_INSERT_CBC
+ * @count: Count
+ * @addr: Address
+ */
+struct tidaw {
+	u32 flags:8;
+	u32 :24;
+	u32 count;
+	u64 addr;
+} __attribute__ ((packed, aligned(16)));
+
+/**
+ * struct tsa_iostat - I/O-Status Transport-Status Area (IO-Stat TSA)
+ * @dev_time: Device Time
+ * @def_time: Defer Time
+ * @queue_time: Queue Time
+ * @dev_busy_time: Device-Busy Time
+ * @dev_act_time: Device-Active-Only Time
+ * @sense: Sense Data (if present)
+ */
+struct tsa_iostat {
+	u32 dev_time;
+	u32 def_time;
+	u32 queue_time;
+	u32 dev_busy_time;
+	u32 dev_act_time;
+	u8 sense[32];
+} __attribute__ ((packed));
+
+/**
+ * struct tsa_ddpcs - Device-Detected-Program-Check Transport-Status Area (DDPC TSA)
+ * @rc: Reason Code
+ * @rcq: Reason Code Qualifier
+ * @sense: Sense Data (if present)
+ */
+struct tsa_ddpc {
+	u32 :24;
+	u32 rc:8;
+	u8 rcq[16];
+	u8 sense[32];
+} __attribute__ ((packed));
+
+#define TSA_INTRG_FLAGS_CU_STATE_VALID		(1 << (7 - 0))
+#define TSA_INTRG_FLAGS_DEV_STATE_VALID		(1 << (7 - 1))
+#define TSA_INTRG_FLAGS_OP_STATE_VALID		(1 << (7 - 2))
+
+/**
+ * struct tsa_intrg - Interrogate Transport-Status Area (Intrg. TSA)
+ * @format: Format
+ * @flags: Flags. Can be an arithmetic OR of the following constants:
+ * %TSA_INTRG_FLAGS_CU_STATE_VALID, %TSA_INTRG_FLAGS_DEV_STATE_VALID,
+ * %TSA_INTRG_FLAGS_OP_STATE_VALID
+ * @cu_state: Controle-Unit State
+ * @dev_state: Device State
+ * @op_state: Operation State
+ * @sd_info: State-Dependent Information
+ * @dl_id: Device-Level Identifier
+ * @dd_data: Device-Dependent Data
+ */
+struct tsa_intrg {
+	u32 format:8;
+	u32 flags:8;
+	u32 cu_state:8;
+	u32 dev_state:8;
+	u32 op_state:8;
+	u32 :24;
+	u8 sd_info[12];
+	u32 dl_id;
+	u8 dd_data[28];
+} __attribute__ ((packed));
+
+#define TSB_FORMAT_NONE		0
+#define TSB_FORMAT_IOSTAT	1
+#define TSB_FORMAT_DDPC		2
+#define TSB_FORMAT_INTRG	3
+
+#define TSB_FLAGS_DCW_OFFSET_VALID	(1 << (7 - 0))
+#define TSB_FLAGS_COUNT_VALID		(1 << (7 - 1))
+#define TSB_FLAGS_CACHE_MISS		(1 << (7 - 2))
+#define TSB_FLAGS_TIME_VALID		(1 << (7 - 3))
+#define TSB_FLAGS_FORMAT(x)		((x) & 7)
+#define TSB_FORMAT(t)			((t)->flags & 7)
+
+/**
+ * struct tsb - Transport-Status Block (TSB)
+ * @length: Length
+ * @flags: Flags. Can be an arithmetic OR of the following constants:
+ * %TSB_FLAGS_DCW_OFFSET_VALID, %TSB_FLAGS_COUNT_VALID, %TSB_FLAGS_CACHE_MISS,
+ * %TSB_FLAGS_TIME_VALID
+ * @dcw_offset: DCW Offset
+ * @count: Count
+ * @tsa: Transport-Status-Area
+ */
+struct tsb {
+	u32 length:8;
+	u32 flags:8;
+	u32 dcw_offset:16;
+	u32 count;
+	u32 :32;
+	union {
+		struct tsa_iostat iostat;
+		struct tsa_ddpc ddpc;
+		struct tsa_intrg intrg;
+	} __attribute__ ((packed)) tsa;
+} __attribute__ ((packed, aligned(8)));
+
+#define DCW_INTRG_FORMAT_DEFAULT	0
+
+#define DCW_INTRG_RC_UNSPECIFIED	0
+#define DCW_INTRG_RC_TIMEOUT		1
+
+#define DCW_INTRG_RCQ_UNSPECIFIED	0
+#define DCW_INTRG_RCQ_PRIMARY		1
+#define DCW_INTRG_RCQ_SECONDARY		2
+
+#define DCW_INTRG_FLAGS_MPM		(1 << (7 - 0))
+#define DCW_INTRG_FLAGS_PPR		(1 << (7 - 1))
+#define DCW_INTRG_FLAGS_CRIT		(1 << (7 - 2))
+
+/**
+ * struct dcw_intrg_data - Interrogate DCW data
+ * @format: Format. Should be %DCW_INTRG_FORMAT_DEFAULT
+ * @rc: Reason Code. Can be one of %DCW_INTRG_RC_UNSPECIFIED,
+ * %DCW_INTRG_RC_TIMEOUT
+ * @rcq: Reason Code Qualifier: Can be one of %DCW_INTRG_RCQ_UNSPECIFIED,
+ * %DCW_INTRG_RCQ_PRIMARY, %DCW_INTRG_RCQ_SECONDARY
+ * @lpm: Logical-Path Mask
+ * @pam: Path-Available Mask
+ * @pim: Path-Installed Mask
+ * @timeout: Timeout
+ * @flags: Flags. Can be an arithmetic OR of %DCW_INTRG_FLAGS_MPM,
+ * %DCW_INTRG_FLAGS_PPR, %DCW_INTRG_FLAGS_CRIT
+ * @time: Time
+ * @prog_id: Program Identifier
+ * @prog_data: Program-Dependent Data
+ */
+struct dcw_intrg_data {
+	u32 format:8;
+	u32 rc:8;
+	u32 rcq:8;
+	u32 lpm:8;
+	u32 pam:8;
+	u32 pim:8;
+	u32 timeout:16;
+	u32 flags:8;
+	u32 :24;
+	u32 :32;
+	u64 time;
+	u64 prog_id;
+	u8  prog_data[0];
+} __attribute__ ((packed));
+
+#define DCW_FLAGS_CC		(1 << (7 - 1))
+
+#define DCW_CMD_WRITE		0x01
+#define DCW_CMD_READ		0x02
+#define DCW_CMD_CONTROL		0x03
+#define DCW_CMD_SENSE		0x04
+#define DCW_CMD_SENSE_ID	0xe4
+#define DCW_CMD_INTRG		0x40
+
+/**
+ * struct dcw - Device-Command Word (DCW)
+ * @cmd: Command Code. Can be one of %DCW_CMD_WRITE, %DCW_CMD_READ,
+ * %DCW_CMD_CONTROL, %DCW_CMD_SENSE, %DCW_CMD_SENSE_ID, %DCW_CMD_INTRG
+ * @flags: Flags. Can be an arithmetic OR of %DCW_FLAGS_CC
+ * @cd_count: Control-Data Count
+ * @count: Count
+ * @cd: Control Data
+ */
+struct dcw {
+	u32 cmd:8;
+	u32 flags:8;
+	u32 :8;
+	u32 cd_count:8;
+	u32 count;
+	u8 cd[0];
+} __attribute__ ((packed));
+
+#define TCCB_FORMAT_DEFAULT	0x7f
+#define TCCB_MAX_DCW		30
+#define TCCB_MAX_SIZE		(sizeof(struct tccb_tcah) + \
+				 TCCB_MAX_DCW * sizeof(struct dcw) + \
+				 sizeof(struct tccb_tcat))
+#define TCCB_SAC_DEFAULT	0x1ffe
+#define TCCB_SAC_INTRG		0x1fff
+
+/**
+ * struct tccb_tcah - Transport-Command-Area Header (TCAH)
+ * @format: Format. Should be %TCCB_FORMAT_DEFAULT
+ * @tcal: Transport-Command-Area Length
+ * @sac: Service-Action Code. Can be one of %TCCB_SAC_DEFAULT, %TCCB_SAC_INTRG
+ * @prio: Priority
+ */
+struct tccb_tcah {
+	u32 format:8;
+	u32 :24;
+	u32 :24;
+	u32 tcal:8;
+	u32 sac:16;
+	u32 :8;
+	u32 prio:8;
+	u32 :32;
+} __attribute__ ((packed));
+
+/**
+ * struct tccb_tcat - Transport-Command-Area Trailer (TCAT)
+ * @count: Transport Count
+ */
+struct tccb_tcat {
+	u32 :32;
+	u32 count;
+} __attribute__ ((packed));
+
+/**
+ * struct tccb - (partial) Transport-Command-Control Block (TCCB)
+ * @tcah: TCAH
+ * @tca: Transport-Command Area
+ */
+struct tccb {
+	struct tccb_tcah tcah;
+	u8 tca[0];
+} __attribute__ ((packed, aligned(8)));
+
+struct tcw *tcw_get_intrg(struct tcw *tcw);
+void *tcw_get_data(struct tcw *tcw);
+struct tccb *tcw_get_tccb(struct tcw *tcw);
+struct tsb *tcw_get_tsb(struct tcw *tcw);
+
+void tcw_init(struct tcw *tcw, int r, int w);
+void tcw_finalize(struct tcw *tcw, int num_tidaws);
+
+void tcw_set_intrg(struct tcw *tcw, struct tcw *intrg_tcw);
+void tcw_set_data(struct tcw *tcw, void *data, int use_tidal);
+void tcw_set_tccb(struct tcw *tcw, struct tccb *tccb);
+void tcw_set_tsb(struct tcw *tcw, struct tsb *tsb);
+
+void tccb_init(struct tccb *tccb, size_t tccb_size, u32 sac);
+void tsb_init(struct tsb *tsb);
+struct dcw *tccb_add_dcw(struct tccb *tccb, size_t tccb_size, u8 cmd, u8 flags,
+			 void *cd, u8 cd_count, u32 count);
+struct tidaw *tcw_add_tidaw(struct tcw *tcw, int num_tidaws, u8 flags,
+			    void *addr, u32 count);
+
+#endif /* _ASM_S390_FCX_H */
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
new file mode 100644
index 000000000..836c56290
--- /dev/null
+++ b/arch/s390/include/asm/ftrace.h
@@ -0,0 +1,84 @@
+#ifndef _ASM_S390_FTRACE_H
+#define _ASM_S390_FTRACE_H
+
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+
+#ifdef CC_USING_HOTPATCH
+#define MCOUNT_INSN_SIZE	6
+#else
+#define MCOUNT_INSN_SIZE	24
+#define MCOUNT_RETURN_FIXUP	18
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define ftrace_return_address(n) __builtin_return_address(n)
+
+void _mcount(void);
+void ftrace_caller(void);
+
+extern char ftrace_graph_caller_end;
+extern unsigned long ftrace_plt;
+
+struct dyn_arch_ftrace { };
+
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define FTRACE_ADDR ((unsigned long)ftrace_caller)
+
+#define KPROBE_ON_FTRACE_NOP	0
+#define KPROBE_ON_FTRACE_CALL	1
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct ftrace_insn {
+	u16 opc;
+	s32 disp;
+} __packed;
+
+static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CC_USING_HOTPATCH
+	/* brcl 0,0 */
+	insn->opc = 0xc004;
+	insn->disp = 0;
+#else
+	/* jg .+24 */
+	insn->opc = 0xc0f4;
+	insn->disp = MCOUNT_INSN_SIZE / 2;
+#endif
+#endif
+}
+
+static inline int is_ftrace_nop(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CC_USING_HOTPATCH
+	if (insn->disp == 0)
+		return 1;
+#else
+	if (insn->disp == MCOUNT_INSN_SIZE / 2)
+		return 1;
+#endif
+#endif
+	return 0;
+}
+
+static inline void ftrace_generate_call_insn(struct ftrace_insn *insn,
+					     unsigned long ip)
+{
+#ifdef CONFIG_FUNCTION_TRACER
+	unsigned long target;
+
+	/* brasl r0,ftrace_caller */
+	target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR;
+	insn->opc = 0xc005;
+	insn->disp = (target - ip) / 2;
+#endif
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
new file mode 100644
index 000000000..a4811aa03
--- /dev/null
+++ b/arch/s390/include/asm/futex.h
@@ -0,0 +1,96 @@
+#ifndef _ASM_S390_FUTEX_H
+#define _ASM_S390_FUTEX_H
+
+#include <linux/uaccess.h>
+#include <linux/futex.h>
+#include <asm/mmu_context.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg)	\
+	asm volatile(							\
+		"   sacf  256\n"					\
+		"0: l     %1,0(%6)\n"					\
+		"1:"insn						\
+		"2: cs    %1,%2,0(%6)\n"				\
+		"3: jl    1b\n"						\
+		"   lhi   %0,0\n"					\
+		"4: sacf  768\n"					\
+		EX_TABLE(0b,4b) EX_TABLE(2b,4b) EX_TABLE(3b,4b)		\
+		: "=d" (ret), "=&d" (oldval), "=&d" (newval),		\
+		  "=m" (*uaddr)						\
+		: "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
+		  "m" (*uaddr) : "cc");
+
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, newval, ret;
+
+	load_kernel_asce();
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	pagefault_disable();
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("lr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("lr %2,%1\nar %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("lr %2,%1\nor %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("lr %2,%1\nnr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("lr %2,%1\nxr %2,%5\n",
+				  ret, oldval, newval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+		default: ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+						u32 oldval, u32 newval)
+{
+	int ret;
+
+	load_kernel_asce();
+	asm volatile(
+		"   sacf 256\n"
+		"0: cs   %1,%4,0(%5)\n"
+		"1: la   %0,0\n"
+		"2: sacf 768\n"
+		EX_TABLE(0b,2b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
+		: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
+		: "cc", "memory");
+	*uval = oldval;
+	return ret;
+}
+
+#endif /* _ASM_S390_FUTEX_H */
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
new file mode 100644
index 000000000..b7eabaaef
--- /dev/null
+++ b/arch/s390/include/asm/hardirq.h
@@ -0,0 +1,26 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ *  Derived from "include/asm-i386/hardirq.h"
+ */
+
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <asm/lowcore.h>
+
+#define local_softirq_pending() (S390_lowcore.softirq_pending)
+
+#define __ARCH_IRQ_STAT
+#define __ARCH_HAS_DO_SOFTIRQ
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
new file mode 100644
index 000000000..11eae5f55
--- /dev/null
+++ b/arch/s390/include/asm/hugetlb.h
@@ -0,0 +1,115 @@
+/*
+ *  IBM System z Huge TLB Page Support for Kernel.
+ *
+ *    Copyright IBM Corp. 2008
+ *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_HUGETLB_H
+#define _ASM_S390_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+
+#define is_hugepage_only_range(mm, addr, len)	0
+#define hugetlb_free_pgd_range			free_pgd_range
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte);
+pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep);
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
+{
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
+		return -EINVAL;
+	return 0;
+}
+
+#define hugetlb_prefault_arch_hook(mm)		do { } while (0)
+#define arch_clear_hugepage_flags(page)		do { } while (0)
+
+int arch_prepare_hugepage(struct page *page);
+void arch_release_hugepage(struct page *page);
+
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+				  pte_t *ptep)
+{
+	pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY;
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long address, pte_t *ptep)
+{
+	huge_ptep_get_and_clear(vma->vm_mm, address, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	int changed = !pte_same(huge_ptep_get(ptep), pte);
+	if (changed) {
+		huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+		set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+	}
+	return changed;
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep);
+	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte));
+}
+
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+{
+	return mk_pte(page, pgprot);
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline int huge_pte_write(pte_t pte)
+{
+	return pte_write(pte);
+}
+
+static inline int huge_pte_dirty(pte_t pte)
+{
+	return pte_dirty(pte);
+}
+
+static inline pte_t huge_pte_mkwrite(pte_t pte)
+{
+	return pte_mkwrite(pte);
+}
+
+static inline pte_t huge_pte_mkdirty(pte_t pte)
+{
+	return pte_mkdirty(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return pte_modify(pte, newprot);
+}
+
+#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
new file mode 100644
index 000000000..ee96a8b69
--- /dev/null
+++ b/arch/s390/include/asm/hw_irq.h
@@ -0,0 +1,11 @@
+#ifndef _HW_IRQ_H
+#define _HW_IRQ_H
+
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+void __init init_airq_interrupts(void);
+void __init init_cio_interrupts(void);
+void __init init_ext_interrupts(void);
+
+#endif
diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h
new file mode 100644
index 000000000..a7b2d7504
--- /dev/null
+++ b/arch/s390/include/asm/idals.h
@@ -0,0 +1,232 @@
+/* 
+ * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
+ *		    Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 2000
+ *
+ * History of changes
+ * 07/24/00 new file
+ * 05/04/02 code restructuring.
+ */
+
+#ifndef _S390_IDALS_H
+#define _S390_IDALS_H
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/cio.h>
+#include <asm/uaccess.h>
+
+#define IDA_SIZE_LOG 12 /* 11 for 2k , 12 for 4k */
+#define IDA_BLOCK_SIZE (1L<<IDA_SIZE_LOG)
+
+/*
+ * Test if an address/length pair needs an idal list.
+ */
+static inline int
+idal_is_needed(void *vaddr, unsigned int length)
+{
+	return ((__pa(vaddr) + length - 1) >> 31) != 0;
+}
+
+
+/*
+ * Return the number of idal words needed for an address/length pair.
+ */
+static inline unsigned int idal_nr_words(void *vaddr, unsigned int length)
+{
+	return ((__pa(vaddr) & (IDA_BLOCK_SIZE-1)) + length +
+		(IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+}
+
+/*
+ * Create the list of idal words for an address/length pair.
+ */
+static inline unsigned long *idal_create_words(unsigned long *idaws,
+					       void *vaddr, unsigned int length)
+{
+	unsigned long paddr;
+	unsigned int cidaw;
+
+	paddr = __pa(vaddr);
+	cidaw = ((paddr & (IDA_BLOCK_SIZE-1)) + length + 
+		 (IDA_BLOCK_SIZE-1)) >> IDA_SIZE_LOG;
+	*idaws++ = paddr;
+	paddr &= -IDA_BLOCK_SIZE;
+	while (--cidaw > 0) {
+		paddr += IDA_BLOCK_SIZE;
+		*idaws++ = paddr;
+	}
+	return idaws;
+}
+
+/*
+ * Sets the address of the data in CCW.
+ * If necessary it allocates an IDAL and sets the appropriate flags.
+ */
+static inline int
+set_normalized_cda(struct ccw1 * ccw, void *vaddr)
+{
+	unsigned int nridaws;
+	unsigned long *idal;
+
+	if (ccw->flags & CCW_FLAG_IDA)
+		return -EINVAL;
+	nridaws = idal_nr_words(vaddr, ccw->count);
+	if (nridaws > 0) {
+		idal = kmalloc(nridaws * sizeof(unsigned long),
+			       GFP_ATOMIC | GFP_DMA );
+		if (idal == NULL)
+			return -ENOMEM;
+		idal_create_words(idal, vaddr, ccw->count);
+		ccw->flags |= CCW_FLAG_IDA;
+		vaddr = idal;
+	}
+	ccw->cda = (__u32)(unsigned long) vaddr;
+	return 0;
+}
+
+/*
+ * Releases any allocated IDAL related to the CCW.
+ */
+static inline void
+clear_normalized_cda(struct ccw1 * ccw)
+{
+	if (ccw->flags & CCW_FLAG_IDA) {
+		kfree((void *)(unsigned long) ccw->cda);
+		ccw->flags &= ~CCW_FLAG_IDA;
+	}
+	ccw->cda = 0;
+}
+
+/*
+ * Idal buffer extension
+ */
+struct idal_buffer {
+	size_t size;
+	size_t page_order;
+	void *data[0];
+};
+
+/*
+ * Allocate an idal buffer
+ */
+static inline struct idal_buffer *
+idal_buffer_alloc(size_t size, int page_order)
+{
+	struct idal_buffer *ib;
+	int nr_chunks, nr_ptrs, i;
+
+	nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
+	nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
+	ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *),
+		     GFP_DMA | GFP_KERNEL);
+	if (ib == NULL)
+		return ERR_PTR(-ENOMEM);
+	ib->size = size;
+	ib->page_order = page_order;
+	for (i = 0; i < nr_ptrs; i++) {
+		if ((i & (nr_chunks - 1)) != 0) {
+			ib->data[i] = ib->data[i-1] + IDA_BLOCK_SIZE;
+			continue;
+		}
+		ib->data[i] = (void *)
+			__get_free_pages(GFP_KERNEL, page_order);
+		if (ib->data[i] != NULL)
+			continue;
+		// Not enough memory
+		while (i >= nr_chunks) {
+			i -= nr_chunks;
+			free_pages((unsigned long) ib->data[i],
+				   ib->page_order);
+		}
+		kfree(ib);
+		return ERR_PTR(-ENOMEM);
+	}
+	return ib;
+}
+
+/*
+ * Free an idal buffer.
+ */
+static inline void
+idal_buffer_free(struct idal_buffer *ib)
+{
+	int nr_chunks, nr_ptrs, i;
+
+	nr_ptrs = (ib->size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
+	nr_chunks = (4096 << ib->page_order) >> IDA_SIZE_LOG;
+	for (i = 0; i < nr_ptrs; i += nr_chunks)
+		free_pages((unsigned long) ib->data[i], ib->page_order);
+	kfree(ib);
+}
+
+/*
+ * Test if a idal list is really needed.
+ */
+static inline int
+__idal_buffer_is_needed(struct idal_buffer *ib)
+{
+	return ib->size > (4096ul << ib->page_order) ||
+		idal_is_needed(ib->data[0], ib->size);
+}
+
+/*
+ * Set channel data address to idal buffer.
+ */
+static inline void
+idal_buffer_set_cda(struct idal_buffer *ib, struct ccw1 *ccw)
+{
+	if (__idal_buffer_is_needed(ib)) {
+		// setup idals;
+		ccw->cda = (u32)(addr_t) ib->data;
+		ccw->flags |= CCW_FLAG_IDA;
+	} else
+		// we do not need idals - use direct addressing
+		ccw->cda = (u32)(addr_t) ib->data[0];
+	ccw->count = ib->size;
+}
+
+/*
+ * Copy count bytes from an idal buffer to user memory
+ */
+static inline size_t
+idal_buffer_to_user(struct idal_buffer *ib, void __user *to, size_t count)
+{
+	size_t left;
+	int i;
+
+	BUG_ON(count > ib->size);
+	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
+		left = copy_to_user(to, ib->data[i], IDA_BLOCK_SIZE);
+		if (left)
+			return left + count - IDA_BLOCK_SIZE;
+		to = (void __user *) to + IDA_BLOCK_SIZE;
+		count -= IDA_BLOCK_SIZE;
+	}
+	return copy_to_user(to, ib->data[i], count);
+}
+
+/*
+ * Copy count bytes from user memory to an idal buffer
+ */
+static inline size_t
+idal_buffer_from_user(struct idal_buffer *ib, const void __user *from, size_t count)
+{
+	size_t left;
+	int i;
+
+	BUG_ON(count > ib->size);
+	for (i = 0; count > IDA_BLOCK_SIZE; i++) {
+		left = copy_from_user(ib->data[i], from, IDA_BLOCK_SIZE);
+		if (left)
+			return left + count - IDA_BLOCK_SIZE;
+		from = (void __user *) from + IDA_BLOCK_SIZE;
+		count -= IDA_BLOCK_SIZE;
+	}
+	return copy_from_user(ib->data[i], from, count);
+}
+
+#endif
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
new file mode 100644
index 000000000..113cd963d
--- /dev/null
+++ b/arch/s390/include/asm/idle.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright IBM Corp. 2014
+ *
+ *  Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _S390_IDLE_H
+#define _S390_IDLE_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/seqlock.h>
+
+struct s390_idle_data {
+	seqcount_t seqcount;
+	unsigned long long idle_count;
+	unsigned long long idle_time;
+	unsigned long long clock_idle_enter;
+	unsigned long long clock_idle_exit;
+	unsigned long long timer_idle_enter;
+	unsigned long long timer_idle_exit;
+};
+
+extern struct device_attribute dev_attr_idle_count;
+extern struct device_attribute dev_attr_idle_time_us;
+
+#endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
new file mode 100644
index 000000000..30fd5c846
--- /dev/null
+++ b/arch/s390/include/asm/io.h
@@ -0,0 +1,77 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/io.h"
+ */
+
+#ifndef _S390_IO_H
+#define _S390_IO_H
+
+#include <linux/kernel.h>
+#include <asm/page.h>
+#include <asm/pci_io.h>
+
+#define xlate_dev_mem_ptr xlate_dev_mem_ptr
+void *xlate_dev_mem_ptr(phys_addr_t phys);
+#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)	p
+
+#define IO_SPACE_LIMIT 0
+
+#ifdef CONFIG_PCI
+
+#define ioremap_nocache(addr, size)	ioremap(addr, size)
+#define ioremap_wc			ioremap_nocache
+
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
+{
+	return (void __iomem *) offset;
+}
+
+static inline void iounmap(volatile void __iomem *addr)
+{
+}
+
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return NULL;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+
+/*
+ * s390 needs a private implementation of pci_iomap since ioremap with its
+ * offset parameter isn't sufficient. That's because BAR spaces are not
+ * disjunctive on s390 so we need the bar parameter of pci_iomap to find
+ * the corresponding device and create the mapping cookie.
+ */
+#define pci_iomap pci_iomap
+#define pci_iounmap pci_iounmap
+
+#define memcpy_fromio(dst, src, count)	zpci_memcpy_fromio(dst, src, count)
+#define memcpy_toio(dst, src, count)	zpci_memcpy_toio(dst, src, count)
+#define memset_io(dst, val, count)	zpci_memset_io(dst, val, count)
+
+#define __raw_readb	zpci_read_u8
+#define __raw_readw	zpci_read_u16
+#define __raw_readl	zpci_read_u32
+#define __raw_readq	zpci_read_u64
+#define __raw_writeb	zpci_write_u8
+#define __raw_writew	zpci_write_u16
+#define __raw_writel	zpci_write_u32
+#define __raw_writeq	zpci_write_u64
+
+#endif /* CONFIG_PCI */
+
+#include <asm-generic/io.h>
+
+#endif
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
new file mode 100644
index 000000000..ece606c2e
--- /dev/null
+++ b/arch/s390/include/asm/ipl.h
@@ -0,0 +1,182 @@
+/*
+ * s390 (re)ipl support
+ *
+ * Copyright IBM Corp. 2007
+ */
+
+#ifndef _ASM_S390_IPL_H
+#define _ASM_S390_IPL_H
+
+#include <asm/lowcore.h>
+#include <asm/types.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+
+#define IPL_PARMBLOCK_ORIGIN	0x2000
+
+#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
+			      sizeof(struct ipl_block_fcp))
+
+#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 16)
+
+#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
+			      sizeof(struct ipl_block_ccw))
+
+#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 16)
+
+#define IPL_MAX_SUPPORTED_VERSION (0)
+
+#define IPL_PARMBLOCK_START	((struct ipl_parameter_block *) \
+				 IPL_PARMBLOCK_ORIGIN)
+#define IPL_PARMBLOCK_SIZE	(IPL_PARMBLOCK_START->hdr.len)
+
+struct ipl_list_hdr {
+	u32 len;
+	u8  reserved1[3];
+	u8  version;
+	u32 blk0_len;
+	u8  pbt;
+	u8  flags;
+	u16 reserved2;
+	u8  loadparm[8];
+} __attribute__((packed));
+
+struct ipl_block_fcp {
+	u8  reserved1[305-1];
+	u8  opt;
+	u8  reserved2[3];
+	u16 reserved3;
+	u16 devno;
+	u8  reserved4[4];
+	u64 wwpn;
+	u64 lun;
+	u32 bootprog;
+	u8  reserved5[12];
+	u64 br_lba;
+	u32 scp_data_len;
+	u8  reserved6[260];
+	u8  scp_data[];
+} __attribute__((packed));
+
+#define DIAG308_VMPARM_SIZE	64
+#define DIAG308_SCPDATA_SIZE	(PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
+				 offsetof(struct ipl_block_fcp, scp_data)))
+
+struct ipl_block_ccw {
+	u8  reserved1[84];
+	u8  reserved2[2];
+	u16 devno;
+	u8  vm_flags;
+	u8  reserved3[3];
+	u32 vm_parm_len;
+	u8  nss_name[8];
+	u8  vm_parm[DIAG308_VMPARM_SIZE];
+	u8  reserved4[8];
+} __attribute__((packed));
+
+struct ipl_parameter_block {
+	struct ipl_list_hdr hdr;
+	union {
+		struct ipl_block_fcp fcp;
+		struct ipl_block_ccw ccw;
+	} ipl_info;
+} __attribute__((packed,aligned(4096)));
+
+/*
+ * IPL validity flags
+ */
+extern u32 ipl_flags;
+extern u32 dump_prefix_page;
+
+struct dump_save_areas {
+	struct save_area_ext **areas;
+	int count;
+};
+
+extern struct dump_save_areas dump_save_areas;
+struct save_area_ext *dump_save_area_create(int cpu);
+
+extern void do_reipl(void);
+extern void do_halt(void);
+extern void do_poff(void);
+extern void ipl_save_parameters(void);
+extern void ipl_update_parameters(void);
+extern size_t append_ipl_vmparm(char *, size_t);
+extern size_t append_ipl_scpdata(char *, size_t);
+
+enum {
+	IPL_DEVNO_VALID		= 1,
+	IPL_PARMBLOCK_VALID	= 2,
+	IPL_NSS_VALID		= 4,
+};
+
+enum ipl_type {
+	IPL_TYPE_UNKNOWN	= 1,
+	IPL_TYPE_CCW		= 2,
+	IPL_TYPE_FCP		= 4,
+	IPL_TYPE_FCP_DUMP	= 8,
+	IPL_TYPE_NSS		= 16,
+};
+
+struct ipl_info
+{
+	enum ipl_type type;
+	union {
+		struct {
+			struct ccw_dev_id dev_id;
+		} ccw;
+		struct {
+			struct ccw_dev_id dev_id;
+			u64 wwpn;
+			u64 lun;
+		} fcp;
+		struct {
+			char name[NSS_NAME_SIZE + 1];
+		} nss;
+	} data;
+};
+
+extern struct ipl_info ipl_info;
+extern void setup_ipl(void);
+
+/*
+ * DIAG 308 support
+ */
+enum diag308_subcode  {
+	DIAG308_REL_HSA	= 2,
+	DIAG308_IPL	= 3,
+	DIAG308_DUMP	= 4,
+	DIAG308_SET	= 5,
+	DIAG308_STORE	= 6,
+};
+
+enum diag308_ipl_type {
+	DIAG308_IPL_TYPE_FCP	= 0,
+	DIAG308_IPL_TYPE_CCW	= 2,
+};
+
+enum diag308_opt {
+	DIAG308_IPL_OPT_IPL	= 0x10,
+	DIAG308_IPL_OPT_DUMP	= 0x20,
+};
+
+enum diag308_flags {
+	DIAG308_FLAGS_LP_VALID	= 0x80,
+};
+
+enum diag308_vm_flags {
+	DIAG308_VM_FLAGS_NSS_VALID	= 0x80,
+	DIAG308_VM_FLAGS_VP_VALID	= 0x40,
+};
+
+enum diag308_rc {
+	DIAG308_RC_OK		= 0x0001,
+	DIAG308_RC_NOCONFIG	= 0x0102,
+};
+
+extern int diag308(unsigned long subcode, void *addr);
+extern void diag308_reset(void);
+extern void store_status(void);
+extern void lgr_info_log(void);
+
+#endif /* _ASM_S390_IPL_H */
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
new file mode 100644
index 000000000..ff95d15a2
--- /dev/null
+++ b/arch/s390/include/asm/irq.h
@@ -0,0 +1,106 @@
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+#define EXT_INTERRUPT	0
+#define IO_INTERRUPT	1
+#define THIN_INTERRUPT	2
+
+#define NR_IRQS_BASE	3
+
+#ifdef CONFIG_PCI_NR_MSI
+# define NR_IRQS	(NR_IRQS_BASE + CONFIG_PCI_NR_MSI)
+#else
+# define NR_IRQS	NR_IRQS_BASE
+#endif
+
+/* External interruption codes */
+#define EXT_IRQ_INTERRUPT_KEY	0x0040
+#define EXT_IRQ_CLK_COMP	0x1004
+#define EXT_IRQ_CPU_TIMER	0x1005
+#define EXT_IRQ_WARNING_TRACK	0x1007
+#define EXT_IRQ_MALFUNC_ALERT	0x1200
+#define EXT_IRQ_EMERGENCY_SIG	0x1201
+#define EXT_IRQ_EXTERNAL_CALL	0x1202
+#define EXT_IRQ_TIMING_ALERT	0x1406
+#define EXT_IRQ_MEASURE_ALERT	0x1407
+#define EXT_IRQ_SERVICE_SIG	0x2401
+#define EXT_IRQ_CP_SERVICE	0x2603
+#define EXT_IRQ_IUCV		0x4000
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hardirq.h>
+#include <linux/percpu.h>
+#include <linux/cache.h>
+#include <linux/types.h>
+
+enum interruption_class {
+	IRQEXT_CLK,
+	IRQEXT_EXC,
+	IRQEXT_EMS,
+	IRQEXT_TMR,
+	IRQEXT_TLA,
+	IRQEXT_PFL,
+	IRQEXT_DSD,
+	IRQEXT_VRT,
+	IRQEXT_SCP,
+	IRQEXT_IUC,
+	IRQEXT_CMS,
+	IRQEXT_CMC,
+	IRQEXT_CMR,
+	IRQEXT_FTP,
+	IRQIO_CIO,
+	IRQIO_QAI,
+	IRQIO_DAS,
+	IRQIO_C15,
+	IRQIO_C70,
+	IRQIO_TAP,
+	IRQIO_VMR,
+	IRQIO_LCS,
+	IRQIO_CTC,
+	IRQIO_APB,
+	IRQIO_ADM,
+	IRQIO_CSC,
+	IRQIO_PCI,
+	IRQIO_MSI,
+	IRQIO_VIR,
+	IRQIO_VAI,
+	NMI_NMI,
+	CPU_RST,
+	NR_ARCH_IRQS
+};
+
+struct irq_stat {
+	unsigned int irqs[NR_ARCH_IRQS];
+};
+
+DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+
+static __always_inline void inc_irq_stat(enum interruption_class irq)
+{
+	__this_cpu_inc(irq_stat.irqs[irq]);
+}
+
+struct ext_code {
+	unsigned short subcode;
+	unsigned short code;
+};
+
+typedef void (*ext_int_handler_t)(struct ext_code, unsigned int, unsigned long);
+
+int register_external_irq(u16 code, ext_int_handler_t handler);
+int unregister_external_irq(u16 code, ext_int_handler_t handler);
+
+enum irq_subclass {
+	IRQ_SUBCLASS_MEASUREMENT_ALERT = 5,
+	IRQ_SUBCLASS_SERVICE_SIGNAL = 9,
+};
+
+void irq_subclass_register(enum irq_subclass subclass);
+void irq_subclass_unregister(enum irq_subclass subclass);
+
+#define irq_canonicalize(irq)  (irq)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_IRQ_H */
diff --git a/arch/s390/include/asm/irq_regs.h b/arch/s390/include/asm/irq_regs.h
new file mode 100644
index 000000000..3dd9c0b70
--- /dev/null
+++ b/arch/s390/include/asm/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
new file mode 100644
index 000000000..16aa0c779
--- /dev/null
+++ b/arch/s390/include/asm/irqflags.h
@@ -0,0 +1,72 @@
+/*
+ *    Copyright IBM Corp. 2006, 2010
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_IRQFLAGS_H
+#define __ASM_IRQFLAGS_H
+
+#include <linux/types.h>
+
+/* store then OR system mask. */
+#define __arch_local_irq_stosm(__or)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stosm	%0,%1"					\
+		: "=Q" (__mask) : "i" (__or) : "memory");		\
+	__mask;								\
+})
+
+/* store then AND system mask. */
+#define __arch_local_irq_stnsm(__and)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stnsm	%0,%1"					\
+		: "=Q" (__mask) : "i" (__and) : "memory");		\
+	__mask;								\
+})
+
+/* set system mask. */
+static inline notrace void __arch_local_irq_ssm(unsigned long flags)
+{
+	asm volatile("ssm   %0" : : "Q" (flags) : "memory");
+}
+
+static inline notrace unsigned long arch_local_save_flags(void)
+{
+	return __arch_local_irq_stnsm(0xff);
+}
+
+static inline notrace unsigned long arch_local_irq_save(void)
+{
+	return __arch_local_irq_stnsm(0xfc);
+}
+
+static inline notrace void arch_local_irq_disable(void)
+{
+	arch_local_irq_save();
+}
+
+static inline notrace void arch_local_irq_enable(void)
+{
+	__arch_local_irq_stosm(0x03);
+}
+
+static inline notrace void arch_local_irq_restore(unsigned long flags)
+{
+	__arch_local_irq_ssm(flags);
+}
+
+static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (3UL << (BITS_PER_LONG - 8)));
+}
+
+static inline notrace bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h
new file mode 100644
index 000000000..68d7d6830
--- /dev/null
+++ b/arch/s390/include/asm/isc.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_S390_ISC_H
+#define _ASM_S390_ISC_H
+
+#include <linux/types.h>
+
+/*
+ * I/O interruption subclasses used by drivers.
+ * Please add all used iscs here so that it is possible to distribute
+ * isc usage between drivers.
+ * Reminder: 0 is highest priority, 7 lowest.
+ */
+#define MAX_ISC 7
+
+/* Regular I/O interrupts. */
+#define IO_SCH_ISC 3			/* regular I/O subchannels */
+#define CONSOLE_ISC 1			/* console I/O subchannel */
+#define EADM_SCH_ISC 4			/* EADM subchannels */
+#define CHSC_SCH_ISC 7			/* CHSC subchannels */
+/* Adapter interrupts. */
+#define QDIO_AIRQ_ISC IO_SCH_ISC	/* I/O subchannel in qdio mode */
+#define PCI_ISC 2			/* PCI I/O subchannels */
+#define AP_ISC 6			/* adjunct processor (crypto) devices */
+
+/* Functions for registration of I/O interruption subclasses */
+void isc_register(unsigned int isc);
+void isc_unregister(unsigned int isc);
+
+#endif /* _ASM_S390_ISC_H */
diff --git a/arch/s390/include/asm/itcw.h b/arch/s390/include/asm/itcw.h
new file mode 100644
index 000000000..fb1bedd3d
--- /dev/null
+++ b/arch/s390/include/asm/itcw.h
@@ -0,0 +1,30 @@
+/*
+ *  Functions for incremental construction of fcx enabled I/O control blocks.
+ *
+ *    Copyright IBM Corp. 2008
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_ITCW_H
+#define _ASM_S390_ITCW_H
+
+#include <linux/types.h>
+#include <asm/fcx.h>
+
+#define ITCW_OP_READ	0
+#define ITCW_OP_WRITE	1
+
+struct itcw;
+
+struct tcw *itcw_get_tcw(struct itcw *itcw);
+size_t itcw_calc_size(int intrg, int max_tidaws, int intrg_max_tidaws);
+struct itcw *itcw_init(void *buffer, size_t size, int op, int intrg,
+		       int max_tidaws, int intrg_max_tidaws);
+struct dcw *itcw_add_dcw(struct itcw *itcw, u8 cmd, u8 flags, void *cd,
+			 u8 cd_count, u32 count);
+struct tidaw *itcw_add_tidaw(struct itcw *itcw, u8 flags, void *addr,
+			     u32 count);
+void itcw_set_data(struct itcw *itcw, void *addr, int use_tidal);
+void itcw_finalize(struct itcw *itcw);
+
+#endif /* _ASM_S390_ITCW_H */
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
new file mode 100644
index 000000000..69972b795
--- /dev/null
+++ b/arch/s390/include/asm/jump_label.h
@@ -0,0 +1,37 @@
+#ifndef _ASM_S390_JUMP_LABEL_H
+#define _ASM_S390_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+#define JUMP_LABEL_NOP_SIZE 6
+#define JUMP_LABEL_NOP_OFFSET 2
+
+/*
+ * We use a brcl 0,2 instruction for jump labels at compile time so it
+ * can be easily distinguished from a hotpatch generated instruction.
+ */
+static __always_inline bool arch_static_branch(struct static_key *key)
+{
+	asm_volatile_goto("0:	brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
+		".pushsection __jump_table, \"aw\"\n"
+		".balign 8\n"
+		".quad 0b, %l[label], %0\n"
+		".popsection\n"
+		: : "X" (key) : : label);
+	return false;
+label:
+	return true;
+}
+
+typedef unsigned long jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif  /* __ASSEMBLY__ */
+#endif
diff --git a/arch/s390/include/asm/kdebug.h b/arch/s390/include/asm/kdebug.h
new file mode 100644
index 000000000..5c1abd476
--- /dev/null
+++ b/arch/s390/include/asm/kdebug.h
@@ -0,0 +1,27 @@
+#ifndef _S390_KDEBUG_H
+#define _S390_KDEBUG_H
+
+/*
+ * Feb 2006 Ported to s390 <grundym@us.ibm.com>
+ */
+
+struct pt_regs;
+
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_BPT,
+	DIE_SSTEP,
+	DIE_PANIC,
+	DIE_NMI,
+	DIE_DIE,
+	DIE_NMIWATCHDOG,
+	DIE_KERNELDEBUG,
+	DIE_TRAP,
+	DIE_GPF,
+	DIE_CALL,
+	DIE_NMI_IPI,
+};
+
+extern void die(struct pt_regs *, const char *);
+
+#endif
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
new file mode 100644
index 000000000..2f924bc30
--- /dev/null
+++ b/arch/s390/include/asm/kexec.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger <adelsberger@de.ibm.com>
+ *
+ */
+
+#ifndef _S390_KEXEC_H
+#define _S390_KEXEC_H
+
+#include <asm/processor.h>
+#include <asm/page.h>
+/*
+ * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
+ * I.e. Maximum page that is mapped directly into kernel memory,
+ * and kmap is not required.
+ */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control pages */
+/* Not more than 2GB */
+#define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
+
+/* Allocate control page with GFP_DMA */
+#define KEXEC_CONTROL_MEMORY_GFP GFP_DMA
+
+/* Maximum address we can use for the crash control pages */
+#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Allocate one page for the pdp and the second for the code */
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+/* Alignment of crashkernel memory */
+#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_S390
+
+/*
+ * Size for s390x ELF notes per CPU
+ *
+ * Seven notes plus zero note at the end: prstatus, fpregset, timer,
+ * tod_cmp, tod_reg, control regs, and prefix
+ */
+#define KEXEC_NOTE_BYTES \
+	(ALIGN(sizeof(struct elf_note), 4) * 8 + \
+	 ALIGN(sizeof("CORE"), 4) * 7 + \
+	 ALIGN(sizeof(struct elf_prstatus), 4) + \
+	 ALIGN(sizeof(elf_fpregset_t), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u64), 4) + \
+	 ALIGN(sizeof(u32), 4) + \
+	 ALIGN(sizeof(u64) * 16, 4) + \
+	 ALIGN(sizeof(u32), 4) \
+	)
+
+/* Provide a dummy definition to avoid build failures. */
+static inline void crash_setup_regs(struct pt_regs *newregs,
+					struct pt_regs *oldregs) { }
+
+#endif /*_S390_KEXEC_H */
diff --git a/arch/s390/include/asm/kmap_types.h b/arch/s390/include/asm/kmap_types.h
new file mode 100644
index 000000000..0a8862233
--- /dev/null
+++ b/arch/s390/include/asm/kmap_types.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+#include <asm-generic/kmap_types.h>
+
+#endif
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
new file mode 100644
index 000000000..b47ad3b64
--- /dev/null
+++ b/arch/s390/include/asm/kprobes.h
@@ -0,0 +1,94 @@
+#ifndef _ASM_S390_KPROBES_H
+#define _ASM_S390_KPROBES_H
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes suggestions from
+ *		Rusty Russell).
+ * 2004-Nov	Modified for PPC64 by Ananth N Mavinakayanahalli
+ *		<ananth@in.ibm.com>
+ * 2005-Dec	Used as a template for s390 by Mike Grundy
+ *		<grundym@us.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
+struct pt_regs;
+struct kprobe;
+
+typedef u16 kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0x0002
+
+/* Maximum instruction size is 3 (16bit) halfwords: */
+#define MAX_INSN_SIZE		0x0003
+#define MAX_STACK_SIZE		64
+#define MIN_STACK_SIZE(ADDR) (((MAX_STACK_SIZE) < \
+	(((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) \
+	? (MAX_STACK_SIZE) \
+	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
+
+#define kretprobe_blacklist_size 0
+
+#define KPROBE_SWAP_INST	0x10
+
+#define FIXUP_PSW_NORMAL	0x08
+#define FIXUP_BRANCH_NOT_TAKEN	0x04
+#define FIXUP_RETURN_REGISTER	0x02
+#define FIXUP_NOT_REQUIRED	0x01
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	/* copy of original instruction */
+	kprobe_opcode_t *insn;
+	unsigned int is_ftrace_insn : 1;
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long kprobe_saved_imask;
+	unsigned long kprobe_saved_ctl[3];
+	struct prev_kprobe prev_kprobe;
+	struct pt_regs jprobe_saved_regs;
+	kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+void kretprobe_trampoline(void);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+int kprobe_exceptions_notify(struct notifier_block *self,
+	unsigned long val, void *data);
+
+int probe_is_prohibited_opcode(u16 *insn);
+int probe_get_fixup_type(u16 *insn);
+int probe_is_insn_relative_long(u16 *insn);
+
+#define flush_insn_slot(p)	do { } while (0)
+
+#endif	/* _ASM_S390_KPROBES_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
new file mode 100644
index 000000000..d01fc588b
--- /dev/null
+++ b/arch/s390/include/asm/kvm_host.h
@@ -0,0 +1,642 @@
+/*
+ * definition for kernel virtual machines on s390
+ *
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+
+#ifndef ASM_KVM_HOST_H
+#define ASM_KVM_HOST_H
+
+#include <linux/types.h>
+#include <linux/hrtimer.h>
+#include <linux/interrupt.h>
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <asm/debug.h>
+#include <asm/cpu.h>
+#include <asm/isc.h>
+
+#define KVM_MAX_VCPUS 64
+#define KVM_USER_MEM_SLOTS 32
+
+/*
+ * These seem to be used for allocating ->chip in the routing table,
+ * which we don't use. 4096 is an out-of-thin-air value. If we need
+ * to look at ->chip later on, we'll need to revisit this.
+ */
+#define KVM_NR_IRQCHIPS 1
+#define KVM_IRQCHIP_NUM_PINS 4096
+
+#define SIGP_CTRL_C		0x80
+#define SIGP_CTRL_SCN_MASK	0x3f
+
+struct sca_entry {
+	__u8	reserved0;
+	__u8	sigp_ctrl;
+	__u16	reserved[3];
+	__u64	sda;
+	__u64	reserved2[2];
+} __attribute__((packed));
+
+union ipte_control {
+	unsigned long val;
+	struct {
+		unsigned long k  : 1;
+		unsigned long kh : 31;
+		unsigned long kg : 32;
+	};
+};
+
+struct sca_block {
+	union ipte_control ipte_control;
+	__u64	reserved[5];
+	__u64	mcn;
+	__u64	reserved2;
+	struct sca_entry cpu[64];
+} __attribute__((packed));
+
+#define CPUSTAT_STOPPED    0x80000000
+#define CPUSTAT_WAIT       0x10000000
+#define CPUSTAT_ECALL_PEND 0x08000000
+#define CPUSTAT_STOP_INT   0x04000000
+#define CPUSTAT_IO_INT     0x02000000
+#define CPUSTAT_EXT_INT    0x01000000
+#define CPUSTAT_RUNNING    0x00800000
+#define CPUSTAT_RETAINED   0x00400000
+#define CPUSTAT_TIMING_SUB 0x00020000
+#define CPUSTAT_SIE_SUB    0x00010000
+#define CPUSTAT_RRF        0x00008000
+#define CPUSTAT_SLSV       0x00004000
+#define CPUSTAT_SLSR       0x00002000
+#define CPUSTAT_ZARCH      0x00000800
+#define CPUSTAT_MCDS       0x00000100
+#define CPUSTAT_SM         0x00000080
+#define CPUSTAT_IBS        0x00000040
+#define CPUSTAT_G          0x00000008
+#define CPUSTAT_GED        0x00000004
+#define CPUSTAT_J          0x00000002
+#define CPUSTAT_P          0x00000001
+
+struct kvm_s390_sie_block {
+	atomic_t cpuflags;		/* 0x0000 */
+	__u32 : 1;			/* 0x0004 */
+	__u32 prefix : 18;
+	__u32 : 1;
+	__u32 ibc : 12;
+	__u8	reserved08[4];		/* 0x0008 */
+#define PROG_IN_SIE (1<<0)
+	__u32	prog0c;			/* 0x000c */
+	__u8	reserved10[16];		/* 0x0010 */
+#define PROG_BLOCK_SIE 0x00000001
+	atomic_t prog20;		/* 0x0020 */
+	__u8	reserved24[4];		/* 0x0024 */
+	__u64	cputm;			/* 0x0028 */
+	__u64	ckc;			/* 0x0030 */
+	__u64	epoch;			/* 0x0038 */
+	__u8	reserved40[4];		/* 0x0040 */
+#define LCTL_CR0	0x8000
+#define LCTL_CR6	0x0200
+#define LCTL_CR9	0x0040
+#define LCTL_CR10	0x0020
+#define LCTL_CR11	0x0010
+#define LCTL_CR14	0x0002
+	__u16   lctl;			/* 0x0044 */
+	__s16	icpua;			/* 0x0046 */
+#define ICTL_PINT	0x20000000
+#define ICTL_LPSW	0x00400000
+#define ICTL_STCTL	0x00040000
+#define ICTL_ISKE	0x00004000
+#define ICTL_SSKE	0x00002000
+#define ICTL_RRBE	0x00001000
+#define ICTL_TPROT	0x00000200
+	__u32	ictl;			/* 0x0048 */
+	__u32	eca;			/* 0x004c */
+#define ICPT_INST	0x04
+#define ICPT_PROGI	0x08
+#define ICPT_INSTPROGI	0x0C
+#define ICPT_OPEREXC	0x2C
+#define ICPT_PARTEXEC	0x38
+#define ICPT_IOINST	0x40
+	__u8	icptcode;		/* 0x0050 */
+	__u8	icptstatus;		/* 0x0051 */
+	__u16	ihcpu;			/* 0x0052 */
+	__u8	reserved54[2];		/* 0x0054 */
+	__u16	ipa;			/* 0x0056 */
+	__u32	ipb;			/* 0x0058 */
+	__u32	scaoh;			/* 0x005c */
+	__u8	reserved60;		/* 0x0060 */
+	__u8	ecb;			/* 0x0061 */
+	__u8    ecb2;                   /* 0x0062 */
+#define ECB3_AES 0x04
+#define ECB3_DEA 0x08
+	__u8    ecb3;			/* 0x0063 */
+	__u32	scaol;			/* 0x0064 */
+	__u8	reserved68[4];		/* 0x0068 */
+	__u32	todpr;			/* 0x006c */
+	__u8	reserved70[32];		/* 0x0070 */
+	psw_t	gpsw;			/* 0x0090 */
+	__u64	gg14;			/* 0x00a0 */
+	__u64	gg15;			/* 0x00a8 */
+	__u8	reservedb0[20];		/* 0x00b0 */
+	__u16	extcpuaddr;		/* 0x00c4 */
+	__u16	eic;			/* 0x00c6 */
+	__u32	reservedc8;		/* 0x00c8 */
+	__u16	pgmilc;			/* 0x00cc */
+	__u16	iprcc;			/* 0x00ce */
+	__u32	dxc;			/* 0x00d0 */
+	__u16	mcn;			/* 0x00d4 */
+	__u8	perc;			/* 0x00d6 */
+	__u8	peratmid;		/* 0x00d7 */
+	__u64	peraddr;		/* 0x00d8 */
+	__u8	eai;			/* 0x00e0 */
+	__u8	peraid;			/* 0x00e1 */
+	__u8	oai;			/* 0x00e2 */
+	__u8	armid;			/* 0x00e3 */
+	__u8	reservede4[4];		/* 0x00e4 */
+	__u64	tecmc;			/* 0x00e8 */
+	__u8	reservedf0[12];		/* 0x00f0 */
+#define CRYCB_FORMAT1 0x00000001
+#define CRYCB_FORMAT2 0x00000003
+	__u32	crycbd;			/* 0x00fc */
+	__u64	gcr[16];		/* 0x0100 */
+	__u64	gbea;			/* 0x0180 */
+	__u8	reserved188[24];	/* 0x0188 */
+	__u32	fac;			/* 0x01a0 */
+	__u8	reserved1a4[20];	/* 0x01a4 */
+	__u64	cbrlo;			/* 0x01b8 */
+	__u8	reserved1c0[8];		/* 0x01c0 */
+	__u32	ecd;			/* 0x01c8 */
+	__u8	reserved1cc[18];	/* 0x01cc */
+	__u64	pp;			/* 0x01de */
+	__u8	reserved1e6[2];		/* 0x01e6 */
+	__u64	itdba;			/* 0x01e8 */
+	__u8	reserved1f0[16];	/* 0x01f0 */
+} __attribute__((packed));
+
+struct kvm_s390_itdb {
+	__u8	data[256];
+} __packed;
+
+struct kvm_s390_vregs {
+	__vector128 vrs[32];
+	__u8	reserved200[512];	/* for future vector expansion */
+} __packed;
+
+struct sie_page {
+	struct kvm_s390_sie_block sie_block;
+	__u8 reserved200[1024];		/* 0x0200 */
+	struct kvm_s390_itdb itdb;	/* 0x0600 */
+	__u8 reserved700[1280];		/* 0x0700 */
+	struct kvm_s390_vregs vregs;	/* 0x0c00 */
+} __packed;
+
+struct kvm_vcpu_stat {
+	u32 exit_userspace;
+	u32 exit_null;
+	u32 exit_external_request;
+	u32 exit_external_interrupt;
+	u32 exit_stop_request;
+	u32 exit_validity;
+	u32 exit_instruction;
+	u32 halt_successful_poll;
+	u32 halt_wakeup;
+	u32 instruction_lctl;
+	u32 instruction_lctlg;
+	u32 instruction_stctl;
+	u32 instruction_stctg;
+	u32 exit_program_interruption;
+	u32 exit_instr_and_program;
+	u32 deliver_external_call;
+	u32 deliver_emergency_signal;
+	u32 deliver_service_signal;
+	u32 deliver_virtio_interrupt;
+	u32 deliver_stop_signal;
+	u32 deliver_prefix_signal;
+	u32 deliver_restart_signal;
+	u32 deliver_program_int;
+	u32 deliver_io_int;
+	u32 exit_wait_state;
+	u32 instruction_pfmf;
+	u32 instruction_stidp;
+	u32 instruction_spx;
+	u32 instruction_stpx;
+	u32 instruction_stap;
+	u32 instruction_storage_key;
+	u32 instruction_ipte_interlock;
+	u32 instruction_stsch;
+	u32 instruction_chsc;
+	u32 instruction_stsi;
+	u32 instruction_stfl;
+	u32 instruction_tprot;
+	u32 instruction_essa;
+	u32 instruction_sigp_sense;
+	u32 instruction_sigp_sense_running;
+	u32 instruction_sigp_external_call;
+	u32 instruction_sigp_emergency;
+	u32 instruction_sigp_cond_emergency;
+	u32 instruction_sigp_start;
+	u32 instruction_sigp_stop;
+	u32 instruction_sigp_stop_store_status;
+	u32 instruction_sigp_store_status;
+	u32 instruction_sigp_store_adtl_status;
+	u32 instruction_sigp_arch;
+	u32 instruction_sigp_prefix;
+	u32 instruction_sigp_restart;
+	u32 instruction_sigp_init_cpu_reset;
+	u32 instruction_sigp_cpu_reset;
+	u32 instruction_sigp_unknown;
+	u32 diagnose_10;
+	u32 diagnose_44;
+	u32 diagnose_9c;
+};
+
+#define PGM_OPERATION			0x01
+#define PGM_PRIVILEGED_OP		0x02
+#define PGM_EXECUTE			0x03
+#define PGM_PROTECTION			0x04
+#define PGM_ADDRESSING			0x05
+#define PGM_SPECIFICATION		0x06
+#define PGM_DATA			0x07
+#define PGM_FIXED_POINT_OVERFLOW	0x08
+#define PGM_FIXED_POINT_DIVIDE		0x09
+#define PGM_DECIMAL_OVERFLOW		0x0a
+#define PGM_DECIMAL_DIVIDE		0x0b
+#define PGM_HFP_EXPONENT_OVERFLOW	0x0c
+#define PGM_HFP_EXPONENT_UNDERFLOW	0x0d
+#define PGM_HFP_SIGNIFICANCE		0x0e
+#define PGM_HFP_DIVIDE			0x0f
+#define PGM_SEGMENT_TRANSLATION		0x10
+#define PGM_PAGE_TRANSLATION		0x11
+#define PGM_TRANSLATION_SPEC		0x12
+#define PGM_SPECIAL_OPERATION		0x13
+#define PGM_OPERAND			0x15
+#define PGM_TRACE_TABEL			0x16
+#define PGM_VECTOR_PROCESSING		0x1b
+#define PGM_SPACE_SWITCH		0x1c
+#define PGM_HFP_SQUARE_ROOT		0x1d
+#define PGM_PC_TRANSLATION_SPEC		0x1f
+#define PGM_AFX_TRANSLATION		0x20
+#define PGM_ASX_TRANSLATION		0x21
+#define PGM_LX_TRANSLATION		0x22
+#define PGM_EX_TRANSLATION		0x23
+#define PGM_PRIMARY_AUTHORITY		0x24
+#define PGM_SECONDARY_AUTHORITY		0x25
+#define PGM_LFX_TRANSLATION		0x26
+#define PGM_LSX_TRANSLATION		0x27
+#define PGM_ALET_SPECIFICATION		0x28
+#define PGM_ALEN_TRANSLATION		0x29
+#define PGM_ALE_SEQUENCE		0x2a
+#define PGM_ASTE_VALIDITY		0x2b
+#define PGM_ASTE_SEQUENCE		0x2c
+#define PGM_EXTENDED_AUTHORITY		0x2d
+#define PGM_LSTE_SEQUENCE		0x2e
+#define PGM_ASTE_INSTANCE		0x2f
+#define PGM_STACK_FULL			0x30
+#define PGM_STACK_EMPTY			0x31
+#define PGM_STACK_SPECIFICATION		0x32
+#define PGM_STACK_TYPE			0x33
+#define PGM_STACK_OPERATION		0x34
+#define PGM_ASCE_TYPE			0x38
+#define PGM_REGION_FIRST_TRANS		0x39
+#define PGM_REGION_SECOND_TRANS		0x3a
+#define PGM_REGION_THIRD_TRANS		0x3b
+#define PGM_MONITOR			0x40
+#define PGM_PER				0x80
+#define PGM_CRYPTO_OPERATION		0x119
+
+/* irq types in order of priority */
+enum irq_types {
+	IRQ_PEND_MCHK_EX = 0,
+	IRQ_PEND_SVC,
+	IRQ_PEND_PROG,
+	IRQ_PEND_MCHK_REP,
+	IRQ_PEND_EXT_IRQ_KEY,
+	IRQ_PEND_EXT_MALFUNC,
+	IRQ_PEND_EXT_EMERGENCY,
+	IRQ_PEND_EXT_EXTERNAL,
+	IRQ_PEND_EXT_CLOCK_COMP,
+	IRQ_PEND_EXT_CPU_TIMER,
+	IRQ_PEND_EXT_TIMING,
+	IRQ_PEND_EXT_SERVICE,
+	IRQ_PEND_EXT_HOST,
+	IRQ_PEND_PFAULT_INIT,
+	IRQ_PEND_PFAULT_DONE,
+	IRQ_PEND_VIRTIO,
+	IRQ_PEND_IO_ISC_0,
+	IRQ_PEND_IO_ISC_1,
+	IRQ_PEND_IO_ISC_2,
+	IRQ_PEND_IO_ISC_3,
+	IRQ_PEND_IO_ISC_4,
+	IRQ_PEND_IO_ISC_5,
+	IRQ_PEND_IO_ISC_6,
+	IRQ_PEND_IO_ISC_7,
+	IRQ_PEND_SIGP_STOP,
+	IRQ_PEND_RESTART,
+	IRQ_PEND_SET_PREFIX,
+	IRQ_PEND_COUNT
+};
+
+/* We have 2M for virtio device descriptor pages. Smallest amount of
+ * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
+ */
+#define KVM_S390_MAX_VIRTIO_IRQS 87381
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+		       (1UL << MCHK_EXTD_BIT) | \
+		       (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY)    | \
+			   (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+			   (1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+			   (1UL << IRQ_PEND_EXT_MALFUNC)    | \
+			   (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+			   (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+			   (1UL << IRQ_PEND_EXT_TIMING)     | \
+			   (1UL << IRQ_PEND_EXT_HOST)       | \
+			   (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			   (1UL << IRQ_PEND_VIRTIO)         | \
+			   (1UL << IRQ_PEND_PFAULT_INIT)    | \
+			   (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+			  (1UL << IRQ_PEND_IO_ISC_1) | \
+			  (1UL << IRQ_PEND_IO_ISC_2) | \
+			  (1UL << IRQ_PEND_IO_ISC_3) | \
+			  (1UL << IRQ_PEND_IO_ISC_4) | \
+			  (1UL << IRQ_PEND_IO_ISC_5) | \
+			  (1UL << IRQ_PEND_IO_ISC_6) | \
+			  (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+			    (1UL << IRQ_PEND_MCHK_EX))
+
+struct kvm_s390_interrupt_info {
+	struct list_head list;
+	u64	type;
+	union {
+		struct kvm_s390_io_info io;
+		struct kvm_s390_ext_info ext;
+		struct kvm_s390_pgm_info pgm;
+		struct kvm_s390_emerg_info emerg;
+		struct kvm_s390_extcall_info extcall;
+		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_stop_info stop;
+		struct kvm_s390_mchk_info mchk;
+	};
+};
+
+struct kvm_s390_irq_payload {
+	struct kvm_s390_io_info io;
+	struct kvm_s390_ext_info ext;
+	struct kvm_s390_pgm_info pgm;
+	struct kvm_s390_emerg_info emerg;
+	struct kvm_s390_extcall_info extcall;
+	struct kvm_s390_prefix_info prefix;
+	struct kvm_s390_stop_info stop;
+	struct kvm_s390_mchk_info mchk;
+};
+
+struct kvm_s390_local_interrupt {
+	spinlock_t lock;
+	struct kvm_s390_float_interrupt *float_int;
+	wait_queue_head_t *wq;
+	atomic_t *cpuflags;
+	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+	struct kvm_s390_irq_payload irq;
+	unsigned long pending_irqs;
+};
+
+#define FIRQ_LIST_IO_ISC_0 0
+#define FIRQ_LIST_IO_ISC_1 1
+#define FIRQ_LIST_IO_ISC_2 2
+#define FIRQ_LIST_IO_ISC_3 3
+#define FIRQ_LIST_IO_ISC_4 4
+#define FIRQ_LIST_IO_ISC_5 5
+#define FIRQ_LIST_IO_ISC_6 6
+#define FIRQ_LIST_IO_ISC_7 7
+#define FIRQ_LIST_PFAULT   8
+#define FIRQ_LIST_VIRTIO   9
+#define FIRQ_LIST_COUNT   10
+#define FIRQ_CNTR_IO       0
+#define FIRQ_CNTR_SERVICE  1
+#define FIRQ_CNTR_VIRTIO   2
+#define FIRQ_CNTR_PFAULT   3
+#define FIRQ_MAX_COUNT     4
+
+struct kvm_s390_float_interrupt {
+	unsigned long pending_irqs;
+	spinlock_t lock;
+	struct list_head lists[FIRQ_LIST_COUNT];
+	int counters[FIRQ_MAX_COUNT];
+	struct kvm_s390_mchk_info mchk;
+	struct kvm_s390_ext_info srv_signal;
+	int next_rr_cpu;
+	unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+};
+
+struct kvm_hw_wp_info_arch {
+	unsigned long addr;
+	unsigned long phys_addr;
+	int len;
+	char *old_data;
+};
+
+struct kvm_hw_bp_info_arch {
+	unsigned long addr;
+	int len;
+};
+
+/*
+ * Only the upper 16 bits of kvm_guest_debug->control are arch specific.
+ * Further KVM_GUESTDBG flags which an be used from userspace can be found in
+ * arch/s390/include/uapi/asm/kvm.h
+ */
+#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
+
+#define guestdbg_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
+#define guestdbg_sstep_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+#define guestdbg_hw_bp_enabled(vcpu) \
+		(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
+		(vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
+
+struct kvm_guestdbg_info_arch {
+	unsigned long cr0;
+	unsigned long cr9;
+	unsigned long cr10;
+	unsigned long cr11;
+	struct kvm_hw_bp_info_arch *hw_bp_info;
+	struct kvm_hw_wp_info_arch *hw_wp_info;
+	int nr_hw_bp;
+	int nr_hw_wp;
+	unsigned long last_bp;
+};
+
+struct kvm_vcpu_arch {
+	struct kvm_s390_sie_block *sie_block;
+	s390_fp_regs      host_fpregs;
+	unsigned int      host_acrs[NUM_ACRS];
+	s390_fp_regs      guest_fpregs;
+	struct kvm_s390_vregs	*host_vregs;
+	struct kvm_s390_local_interrupt local_int;
+	struct hrtimer    ckc_timer;
+	struct kvm_s390_pgm_info pgm;
+	union  {
+		struct cpuid	cpu_id;
+		u64		stidp_data;
+	};
+	struct gmap *gmap;
+	struct kvm_guestdbg_info_arch guestdbg;
+	unsigned long pfault_token;
+	unsigned long pfault_select;
+	unsigned long pfault_compare;
+};
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+struct s390_map_info {
+	struct list_head list;
+	__u64 guest_addr;
+	__u64 addr;
+	struct page *page;
+};
+
+struct s390_io_adapter {
+	unsigned int id;
+	int isc;
+	bool maskable;
+	bool masked;
+	bool swap;
+	struct rw_semaphore maps_lock;
+	struct list_head maps;
+	atomic_t nr_maps;
+};
+
+#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+#define MAX_S390_ADAPTER_MAPS 256
+
+/* maximum size of facilities and facility mask is 2k bytes */
+#define S390_ARCH_FAC_LIST_SIZE_BYTE (1<<11)
+#define S390_ARCH_FAC_LIST_SIZE_U64 \
+	(S390_ARCH_FAC_LIST_SIZE_BYTE / sizeof(u64))
+#define S390_ARCH_FAC_MASK_SIZE_BYTE S390_ARCH_FAC_LIST_SIZE_BYTE
+#define S390_ARCH_FAC_MASK_SIZE_U64 \
+	(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
+
+struct kvm_s390_fac {
+	/* facility list requested by guest */
+	__u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
+	/* facility mask supported by kvm & hosting machine */
+	__u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
+};
+
+struct kvm_s390_cpu_model {
+	struct kvm_s390_fac *fac;
+	struct cpuid cpu_id;
+	unsigned short ibc;
+};
+
+struct kvm_s390_crypto {
+	struct kvm_s390_crypto_cb *crycb;
+	__u32 crycbd;
+	__u8 aes_kw;
+	__u8 dea_kw;
+};
+
+struct kvm_s390_crypto_cb {
+	__u8    reserved00[72];                 /* 0x0000 */
+	__u8    dea_wrapping_key_mask[24];      /* 0x0048 */
+	__u8    aes_wrapping_key_mask[32];      /* 0x0060 */
+	__u8    reserved80[128];                /* 0x0080 */
+};
+
+struct kvm_arch{
+	struct sca_block *sca;
+	debug_info_t *dbf;
+	struct kvm_s390_float_interrupt float_int;
+	struct kvm_device *flic;
+	struct gmap *gmap;
+	int css_support;
+	int use_irqchip;
+	int use_cmma;
+	int user_cpu_state_ctrl;
+	int user_sigp;
+	int user_stsi;
+	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
+	wait_queue_head_t ipte_wq;
+	int ipte_lock_count;
+	struct mutex ipte_mutex;
+	spinlock_t start_stop_lock;
+	struct kvm_s390_cpu_model model;
+	struct kvm_s390_crypto crypto;
+	u64 epoch;
+};
+
+#define KVM_HVA_ERR_BAD		(-1UL)
+#define KVM_HVA_ERR_RO_BAD	(-2UL)
+
+static inline bool kvm_is_error_hva(unsigned long addr)
+{
+	return IS_ERR_VALUE(addr);
+}
+
+#define ASYNC_PF_PER_VCPU	64
+struct kvm_arch_async_pf {
+	unsigned long pfault_token;
+};
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work);
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+				     struct kvm_async_pf *work);
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+				 struct kvm_async_pf *work);
+
+extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+extern char sie_exit;
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+
+#endif
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
new file mode 100644
index 000000000..e0f842308
--- /dev/null
+++ b/arch/s390/include/asm/kvm_para.h
@@ -0,0 +1,157 @@
+/*
+ * definition for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+/*
+ * Hypercalls for KVM on s390. The calling convention is similar to the
+ * s390 ABI, so we use R2-R6 for parameters 1-5. In addition we use R1
+ * as hypercall number and R7 as parameter 6. The return value is
+ * written to R2. We use the diagnose instruction as hypercall. To avoid
+ * conflicts with existing diagnoses for LPAR and z/VM, we do not use
+ * the instruction encoded number, but specify the number in R1 and
+ * use 0x500 as KVM hypercall
+ *
+ * Copyright IBM Corp. 2007,2008
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#ifndef __S390_KVM_PARA_H
+#define __S390_KVM_PARA_H
+
+#include <uapi/asm/kvm_para.h>
+
+
+
+static inline long kvm_hypercall0(unsigned long nr)
+{
+	register unsigned long __nr asm("1") = nr;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr): "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall1(unsigned long nr, unsigned long p1)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1) : "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall2(unsigned long nr, unsigned long p1,
+			       unsigned long p2)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2)
+		      : "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall3(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register unsigned long __p3 asm("4") = p3;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+			"d" (__p3) : "memory", "cc");
+	return __rc;
+}
+
+
+static inline long kvm_hypercall4(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register unsigned long __p3 asm("4") = p3;
+	register unsigned long __p4 asm("5") = p4;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+			"d" (__p3), "d" (__p4) : "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall5(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4, unsigned long p5)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register unsigned long __p3 asm("4") = p3;
+	register unsigned long __p4 asm("5") = p4;
+	register unsigned long __p5 asm("6") = p5;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+			"d" (__p3), "d" (__p4), "d" (__p5)  : "memory", "cc");
+	return __rc;
+}
+
+static inline long kvm_hypercall6(unsigned long nr, unsigned long p1,
+			       unsigned long p2, unsigned long p3,
+			       unsigned long p4, unsigned long p5,
+			       unsigned long p6)
+{
+	register unsigned long __nr asm("1") = nr;
+	register unsigned long __p1 asm("2") = p1;
+	register unsigned long __p2 asm("3") = p2;
+	register unsigned long __p3 asm("4") = p3;
+	register unsigned long __p4 asm("5") = p4;
+	register unsigned long __p5 asm("6") = p5;
+	register unsigned long __p6 asm("7") = p6;
+	register long __rc asm("2");
+
+	asm volatile ("diag 2,4,0x500\n"
+		      : "=d" (__rc) : "d" (__nr), "0" (__p1), "d" (__p2),
+			"d" (__p3), "d" (__p4), "d" (__p5), "d" (__p6)
+		      : "memory", "cc");
+	return __rc;
+}
+
+/* kvm on s390 is always paravirtualization enabled */
+static inline int kvm_para_available(void)
+{
+	return 1;
+}
+
+/* No feature bits are currently assigned for kvm on s390 */
+static inline unsigned int kvm_arch_para_features(void)
+{
+	return 0;
+}
+
+static inline bool kvm_check_and_clear_guest_paused(void)
+{
+	return false;
+}
+
+#endif /* __S390_KVM_PARA_H */
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
new file mode 100644
index 000000000..fc8a82847
--- /dev/null
+++ b/arch/s390/include/asm/linkage.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#include <linux/stringify.h>
+
+#define __ALIGN .align 4, 0x07
+#define __ALIGN_STR __stringify(__ALIGN)
+
+#endif
diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h
new file mode 100644
index 000000000..7aa799134
--- /dev/null
+++ b/arch/s390/include/asm/livepatch.h
@@ -0,0 +1,43 @@
+/*
+ * livepatch.h - s390-specific Kernel Live Patching Core
+ *
+ *  Copyright (c) 2013-2015 SUSE
+ *   Authors: Jiri Kosina
+ *	      Vojtech Pavlik
+ *	      Jiri Slaby
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef ASM_LIVEPATCH_H
+#define ASM_LIVEPATCH_H
+
+#include <linux/module.h>
+
+#ifdef CONFIG_LIVEPATCH
+static inline int klp_check_compiler_support(void)
+{
+	return 0;
+}
+
+static inline int klp_write_module_reloc(struct module *mod, unsigned long
+		type, unsigned long loc, unsigned long value)
+{
+	/* not supported yet */
+	return -ENOSYS;
+}
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->psw.addr = ip;
+}
+#else
+#error Live patching support is disabled; check CONFIG_LIVEPATCH
+#endif
+
+#endif
diff --git a/arch/s390/include/asm/local.h b/arch/s390/include/asm/local.h
new file mode 100644
index 000000000..c11c530f7
--- /dev/null
+++ b/arch/s390/include/asm/local.h
@@ -0,0 +1 @@
+#include <asm-generic/local.h>
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644
index 000000000..36c93b5cc
--- /dev/null
+++ b/arch/s390/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
new file mode 100644
index 000000000..663f23e37
--- /dev/null
+++ b/arch/s390/include/asm/lowcore.h
@@ -0,0 +1,217 @@
+/*
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Denis Joseph Barrow,
+ */
+
+#ifndef _ASM_S390_LOWCORE_H
+#define _ASM_S390_LOWCORE_H
+
+#include <linux/types.h>
+#include <asm/ptrace.h>
+#include <asm/cpu.h>
+#include <asm/types.h>
+
+#define LC_ORDER 1
+#define LC_PAGES 2
+
+struct save_area {
+	u64	fp_regs[16];
+	u64	gp_regs[16];
+	u8	psw[16];
+	u8	pad1[8];
+	u32	pref_reg;
+	u32	fp_ctrl_reg;
+	u8	pad2[4];
+	u32	tod_reg;
+	u64	timer;
+	u64	clk_cmp;
+	u8	pad3[8];
+	u32	acc_regs[16];
+	u64	ctrl_regs[16];
+} __packed;
+
+struct save_area_ext {
+	struct save_area	sa;
+	__vector128		vx_regs[32];
+};
+
+struct _lowcore {
+	__u8	pad_0x0000[0x0014-0x0000];	/* 0x0000 */
+	__u32	ipl_parmblock_ptr;		/* 0x0014 */
+	__u8	pad_0x0018[0x0080-0x0018];	/* 0x0018 */
+	__u32	ext_params;			/* 0x0080 */
+	__u16	ext_cpu_addr;			/* 0x0084 */
+	__u16	ext_int_code;			/* 0x0086 */
+	__u16	svc_ilc;			/* 0x0088 */
+	__u16	svc_code;			/* 0x008a */
+	__u16	pgm_ilc;			/* 0x008c */
+	__u16	pgm_code;			/* 0x008e */
+	__u32	data_exc_code;			/* 0x0090 */
+	__u16	mon_class_num;			/* 0x0094 */
+	__u8	per_code;			/* 0x0096 */
+	__u8	per_atmid;			/* 0x0097 */
+	__u64	per_address;			/* 0x0098 */
+	__u8	exc_access_id;			/* 0x00a0 */
+	__u8	per_access_id;			/* 0x00a1 */
+	__u8	op_access_id;			/* 0x00a2 */
+	__u8	ar_mode_id;			/* 0x00a3 */
+	__u8	pad_0x00a4[0x00a8-0x00a4];	/* 0x00a4 */
+	__u64	trans_exc_code;			/* 0x00a8 */
+	__u64	monitor_code;			/* 0x00b0 */
+	__u16	subchannel_id;			/* 0x00b8 */
+	__u16	subchannel_nr;			/* 0x00ba */
+	__u32	io_int_parm;			/* 0x00bc */
+	__u32	io_int_word;			/* 0x00c0 */
+	__u8	pad_0x00c4[0x00c8-0x00c4];	/* 0x00c4 */
+	__u32	stfl_fac_list;			/* 0x00c8 */
+	__u8	pad_0x00cc[0x00e8-0x00cc];	/* 0x00cc */
+	__u32	mcck_interruption_code[2];	/* 0x00e8 */
+	__u8	pad_0x00f0[0x00f4-0x00f0];	/* 0x00f0 */
+	__u32	external_damage_code;		/* 0x00f4 */
+	__u64	failing_storage_address;	/* 0x00f8 */
+	__u8	pad_0x0100[0x0110-0x0100];	/* 0x0100 */
+	__u64	breaking_event_addr;		/* 0x0110 */
+	__u8	pad_0x0118[0x0120-0x0118];	/* 0x0118 */
+	psw_t	restart_old_psw;		/* 0x0120 */
+	psw_t	external_old_psw;		/* 0x0130 */
+	psw_t	svc_old_psw;			/* 0x0140 */
+	psw_t	program_old_psw;		/* 0x0150 */
+	psw_t	mcck_old_psw;			/* 0x0160 */
+	psw_t	io_old_psw;			/* 0x0170 */
+	__u8	pad_0x0180[0x01a0-0x0180];	/* 0x0180 */
+	psw_t	restart_psw;			/* 0x01a0 */
+	psw_t	external_new_psw;		/* 0x01b0 */
+	psw_t	svc_new_psw;			/* 0x01c0 */
+	psw_t	program_new_psw;		/* 0x01d0 */
+	psw_t	mcck_new_psw;			/* 0x01e0 */
+	psw_t	io_new_psw;			/* 0x01f0 */
+
+	/* Save areas. */
+	__u64	save_area_sync[8];		/* 0x0200 */
+	__u64	save_area_async[8];		/* 0x0240 */
+	__u64	save_area_restart[1];		/* 0x0280 */
+
+	/* CPU flags. */
+	__u64	cpu_flags;			/* 0x0288 */
+
+	/* Return psws. */
+	psw_t	return_psw;			/* 0x0290 */
+	psw_t	return_mcck_psw;		/* 0x02a0 */
+
+	/* CPU accounting and timing values. */
+	__u64	sync_enter_timer;		/* 0x02b0 */
+	__u64	async_enter_timer;		/* 0x02b8 */
+	__u64	mcck_enter_timer;		/* 0x02c0 */
+	__u64	exit_timer;			/* 0x02c8 */
+	__u64	user_timer;			/* 0x02d0 */
+	__u64	system_timer;			/* 0x02d8 */
+	__u64	steal_timer;			/* 0x02e0 */
+	__u64	last_update_timer;		/* 0x02e8 */
+	__u64	last_update_clock;		/* 0x02f0 */
+	__u64	int_clock;			/* 0x02f8 */
+	__u64	mcck_clock;			/* 0x0300 */
+	__u64	clock_comparator;		/* 0x0308 */
+
+	/* Current process. */
+	__u64	current_task;			/* 0x0310 */
+	__u64	thread_info;			/* 0x0318 */
+	__u64	kernel_stack;			/* 0x0320 */
+
+	/* Interrupt, panic and restart stack. */
+	__u64	async_stack;			/* 0x0328 */
+	__u64	panic_stack;			/* 0x0330 */
+	__u64	restart_stack;			/* 0x0338 */
+
+	/* Restart function and parameter. */
+	__u64	restart_fn;			/* 0x0340 */
+	__u64	restart_data;			/* 0x0348 */
+	__u64	restart_source;			/* 0x0350 */
+
+	/* Address space pointer. */
+	__u64	kernel_asce;			/* 0x0358 */
+	__u64	user_asce;			/* 0x0360 */
+	__u64	current_pid;			/* 0x0368 */
+
+	/* SMP info area */
+	__u32	cpu_nr;				/* 0x0370 */
+	__u32	softirq_pending;		/* 0x0374 */
+	__u64	percpu_offset;			/* 0x0378 */
+	__u64	vdso_per_cpu_data;		/* 0x0380 */
+	__u64	machine_flags;			/* 0x0388 */
+	__u8	pad_0x0390[0x0398-0x0390];	/* 0x0390 */
+	__u64	gmap;				/* 0x0398 */
+	__u32	spinlock_lockval;		/* 0x03a0 */
+	__u8	pad_0x03a0[0x0400-0x03a4];	/* 0x03a4 */
+
+	/* Per cpu primary space access list */
+	__u32	paste[16];			/* 0x0400 */
+
+	__u8	pad_0x04c0[0x0e00-0x0440];	/* 0x0440 */
+
+	/*
+	 * 0xe00 contains the address of the IPL Parameter Information
+	 * block. Dump tools need IPIB for IPL after dump.
+	 * Note: do not change the position of any fields in 0x0e00-0x0f00
+	 */
+	__u64	ipib;				/* 0x0e00 */
+	__u32	ipib_checksum;			/* 0x0e08 */
+	__u64	vmcore_info;			/* 0x0e0c */
+	__u8	pad_0x0e14[0x0e18-0x0e14];	/* 0x0e14 */
+	__u64	os_info;			/* 0x0e18 */
+	__u8	pad_0x0e20[0x0f00-0x0e20];	/* 0x0e20 */
+
+	/* Extended facility list */
+	__u64	stfle_fac_list[32];		/* 0x0f00 */
+	__u8	pad_0x1000[0x11b0-0x1000];	/* 0x1000 */
+
+	/* Pointer to vector register save area */
+	__u64	vector_save_area_addr;		/* 0x11b0 */
+
+	/* 64 bit extparam used for pfault/diag 250: defined by architecture */
+	__u64	ext_params2;			/* 0x11B8 */
+	__u8	pad_0x11c0[0x1200-0x11C0];	/* 0x11C0 */
+
+	/* CPU register save area: defined by architecture */
+	__u64	floating_pt_save_area[16];	/* 0x1200 */
+	__u64	gpregs_save_area[16];		/* 0x1280 */
+	psw_t	psw_save_area;			/* 0x1300 */
+	__u8	pad_0x1310[0x1318-0x1310];	/* 0x1310 */
+	__u32	prefixreg_save_area;		/* 0x1318 */
+	__u32	fpt_creg_save_area;		/* 0x131c */
+	__u8	pad_0x1320[0x1324-0x1320];	/* 0x1320 */
+	__u32	tod_progreg_save_area;		/* 0x1324 */
+	__u32	cpu_timer_save_area[2];		/* 0x1328 */
+	__u32	clock_comp_save_area[2];	/* 0x1330 */
+	__u8	pad_0x1338[0x1340-0x1338];	/* 0x1338 */
+	__u32	access_regs_save_area[16];	/* 0x1340 */
+	__u64	cregs_save_area[16];		/* 0x1380 */
+	__u8	pad_0x1400[0x1800-0x1400];	/* 0x1400 */
+
+	/* Transaction abort diagnostic block */
+	__u8	pgm_tdb[256];			/* 0x1800 */
+	__u8	pad_0x1900[0x1c00-0x1900];	/* 0x1900 */
+
+	/* Software defined save area for vector registers */
+	__u8	vector_save_area[1024];		/* 0x1c00 */
+} __packed;
+
+#define S390_lowcore (*((struct _lowcore *) 0))
+
+extern struct _lowcore *lowcore_ptr[];
+
+static inline void set_prefix(__u32 address)
+{
+	asm volatile("spx %0" : : "m" (address) : "memory");
+}
+
+static inline __u32 store_prefix(void)
+{
+	__u32 address;
+
+	asm volatile("stpx %0" : "=m" (address));
+	return address;
+}
+
+#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/mathemu.h b/arch/s390/include/asm/mathemu.h
new file mode 100644
index 000000000..614dfaf47
--- /dev/null
+++ b/arch/s390/include/asm/mathemu.h
@@ -0,0 +1,28 @@
+/*
+ *    IEEE floating point emulation.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#ifndef __MATHEMU__
+#define __MATHEMU__
+
+extern int math_emu_b3(__u8 *, struct pt_regs *);
+extern int math_emu_ed(__u8 *, struct pt_regs *);
+extern int math_emu_ldr(__u8 *);
+extern int math_emu_ler(__u8 *);
+extern int math_emu_std(__u8 *, struct pt_regs *);
+extern int math_emu_ld(__u8 *, struct pt_regs *);
+extern int math_emu_ste(__u8 *, struct pt_regs *);
+extern int math_emu_le(__u8 *, struct pt_regs *);
+extern int math_emu_lfpc(__u8 *, struct pt_regs *);
+extern int math_emu_stfpc(__u8 *, struct pt_regs *);
+extern int math_emu_srnm(__u8 *, struct pt_regs *);
+
+#endif                                 /* __MATHEMU__                      */
+
+
+
+
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
new file mode 100644
index 000000000..b55a59e1d
--- /dev/null
+++ b/arch/s390/include/asm/mman.h
@@ -0,0 +1,15 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/mman.h"
+ */
+#ifndef __S390_MMAN_H__
+#define __S390_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+#ifndef __ASSEMBLY__
+int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags);
+#define arch_mmap_check(addr, len, flags) s390_mmap_check(addr, len, flags)
+#endif
+#endif /* __S390_MMAN_H__ */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
new file mode 100644
index 000000000..d29ad9545
--- /dev/null
+++ b/arch/s390/include/asm/mmu.h
@@ -0,0 +1,44 @@
+#ifndef __MMU_H
+#define __MMU_H
+
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+
+typedef struct {
+	cpumask_t cpu_attach_mask;
+	atomic_t attach_count;
+	unsigned int flush_mm;
+	spinlock_t list_lock;
+	struct list_head pgtable_list;
+	struct list_head gmap_list;
+	unsigned long asce_bits;
+	unsigned long asce_limit;
+	unsigned long vdso_base;
+	/* The mmu context allocates 4K page tables. */
+	unsigned int alloc_pgste:1;
+	/* The mmu context uses extended page tables. */
+	unsigned int has_pgste:1;
+	/* The mmu context uses storage keys. */
+	unsigned int use_skey:1;
+} mm_context_t;
+
+#define INIT_MM_CONTEXT(name)						      \
+	.context.list_lock    = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \
+	.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list),    \
+	.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
+
+static inline int tprot(unsigned long addr)
+{
+	int rc = -EFAULT;
+
+	asm volatile(
+		"	tprot	0(%1),0\n"
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) : "a" (addr) : "cc");
+	return rc;
+}
+
+#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
new file mode 100644
index 000000000..fb1b93ea3
--- /dev/null
+++ b/arch/s390/include/asm/mmu_context.h
@@ -0,0 +1,133 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/mmu_context.h"
+ */
+
+#ifndef __S390_MMU_CONTEXT_H
+#define __S390_MMU_CONTEXT_H
+
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/tlbflush.h>
+#include <asm/ctl_reg.h>
+
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	cpumask_clear(&mm->context.cpu_attach_mask);
+	atomic_set(&mm->context.attach_count, 0);
+	mm->context.flush_mm = 0;
+	mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS;
+	mm->context.asce_bits |= _ASCE_TYPE_REGION3;
+#ifdef CONFIG_PGSTE
+	mm->context.alloc_pgste = page_table_allocate_pgste;
+	mm->context.has_pgste = 0;
+	mm->context.use_skey = 0;
+#endif
+	mm->context.asce_limit = STACK_TOP_MAX;
+	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
+	return 0;
+}
+
+#define destroy_context(mm)             do { } while (0)
+
+static inline void set_user_asce(struct mm_struct *mm)
+{
+	S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd);
+	if (current->thread.mm_segment.ar4)
+		__ctl_load(S390_lowcore.user_asce, 7, 7);
+	set_cpu_flag(CIF_ASCE);
+}
+
+static inline void clear_user_asce(void)
+{
+	S390_lowcore.user_asce = S390_lowcore.kernel_asce;
+
+	__ctl_load(S390_lowcore.user_asce, 1, 1);
+	__ctl_load(S390_lowcore.user_asce, 7, 7);
+}
+
+static inline void load_kernel_asce(void)
+{
+	unsigned long asce;
+
+	__ctl_store(asce, 1, 1);
+	if (asce != S390_lowcore.kernel_asce)
+		__ctl_load(S390_lowcore.kernel_asce, 1, 1);
+	set_cpu_flag(CIF_ASCE);
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+			     struct task_struct *tsk)
+{
+	int cpu = smp_processor_id();
+
+	S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
+	if (prev == next)
+		return;
+	if (MACHINE_HAS_TLB_LC)
+		cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
+	/* Clear old ASCE by loading the kernel ASCE. */
+	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
+	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
+	atomic_inc(&next->context.attach_count);
+	atomic_dec(&prev->context.attach_count);
+	if (MACHINE_HAS_TLB_LC)
+		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+}
+
+#define finish_arch_post_lock_switch finish_arch_post_lock_switch
+static inline void finish_arch_post_lock_switch(void)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+
+	load_kernel_asce();
+	if (mm) {
+		preempt_disable();
+		while (atomic_read(&mm->context.attach_count) >> 16)
+			cpu_relax();
+
+		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+		if (mm->context.flush_mm)
+			__tlb_flush_mm(mm);
+		preempt_enable();
+	}
+	set_fs(current->thread.mm_segment);
+}
+
+#define enter_lazy_tlb(mm,tsk)	do { } while (0)
+#define deactivate_mm(tsk,mm)	do { } while (0)
+
+static inline void activate_mm(struct mm_struct *prev,
+                               struct mm_struct *next)
+{
+	switch_mm(prev, next, current);
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+	set_user_asce(next);
+}
+
+static inline void arch_dup_mmap(struct mm_struct *oldmm,
+				 struct mm_struct *mm)
+{
+	if (oldmm->context.asce_limit < mm->context.asce_limit)
+		crst_table_downgrade(mm, oldmm->context.asce_limit);
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+static inline void arch_unmap(struct mm_struct *mm,
+			struct vm_area_struct *vma,
+			unsigned long start, unsigned long end)
+{
+}
+
+static inline void arch_bprm_mm_init(struct mm_struct *mm,
+				     struct vm_area_struct *vma)
+{
+}
+
+#endif /* __S390_MMU_CONTEXT_H */
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
new file mode 100644
index 000000000..df1f861a8
--- /dev/null
+++ b/arch/s390/include/asm/module.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_S390_MODULE_H
+#define _ASM_S390_MODULE_H
+
+#include <asm-generic/module.h>
+
+/*
+ * This file contains the s390 architecture specific module code.
+ */
+
+struct mod_arch_syminfo
+{
+	unsigned long got_offset;
+	unsigned long plt_offset;
+	int got_initialized;
+	int plt_initialized;
+};
+
+struct mod_arch_specific
+{
+	/* Starting offset of got in the module core memory. */
+	unsigned long got_offset;
+	/* Starting offset of plt in the module core memory. */
+	unsigned long plt_offset;
+	/* Size of the got. */
+	unsigned long got_size;
+	/* Size of the plt. */
+	unsigned long plt_size;
+	/* Number of symbols in syminfo. */
+	int nsyms;
+	/* Additional symbol information (got and plt offsets). */
+	struct mod_arch_syminfo *syminfo;
+};
+
+#endif /* _ASM_S390_MODULE_H */
diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h
new file mode 100644
index 000000000..458c1f7fb
--- /dev/null
+++ b/arch/s390/include/asm/mutex.h
@@ -0,0 +1,9 @@
+/*
+ * Pull in the generic implementation for the mutex fastpath.
+ *
+ * TODO: implement optimized primitives instead, or leave the generic
+ * implementation in place, or pick the atomic_xchg() based generic
+ * implementation. (see asm-generic/mutex-xchg.h for details)
+ */
+
+#include <asm-generic/mutex-dec.h>
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
new file mode 100644
index 000000000..3027a5a72
--- /dev/null
+++ b/arch/s390/include/asm/nmi.h
@@ -0,0 +1,66 @@
+/*
+ *   Machine check handler definitions
+ *
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#ifndef _ASM_S390_NMI_H
+#define _ASM_S390_NMI_H
+
+#include <linux/types.h>
+
+struct mci {
+	__u32 sd :  1; /* 00 system damage */
+	__u32 pd :  1; /* 01 instruction-processing damage */
+	__u32 sr :  1; /* 02 system recovery */
+	__u32	 :  1; /* 03 */
+	__u32 cd :  1; /* 04 timing-facility damage */
+	__u32 ed :  1; /* 05 external damage */
+	__u32	 :  1; /* 06 */
+	__u32 dg :  1; /* 07 degradation */
+	__u32 w  :  1; /* 08 warning pending */
+	__u32 cp :  1; /* 09 channel-report pending */
+	__u32 sp :  1; /* 10 service-processor damage */
+	__u32 ck :  1; /* 11 channel-subsystem damage */
+	__u32	 :  2; /* 12-13 */
+	__u32 b  :  1; /* 14 backed up */
+	__u32	 :  1; /* 15 */
+	__u32 se :  1; /* 16 storage error uncorrected */
+	__u32 sc :  1; /* 17 storage error corrected */
+	__u32 ke :  1; /* 18 storage-key error uncorrected */
+	__u32 ds :  1; /* 19 storage degradation */
+	__u32 wp :  1; /* 20 psw mwp validity */
+	__u32 ms :  1; /* 21 psw mask and key validity */
+	__u32 pm :  1; /* 22 psw program mask and cc validity */
+	__u32 ia :  1; /* 23 psw instruction address validity */
+	__u32 fa :  1; /* 24 failing storage address validity */
+	__u32 vr :  1; /* 25 vector register validity */
+	__u32 ec :  1; /* 26 external damage code validity */
+	__u32 fp :  1; /* 27 floating point register validity */
+	__u32 gr :  1; /* 28 general register validity */
+	__u32 cr :  1; /* 29 control register validity */
+	__u32	 :  1; /* 30 */
+	__u32 st :  1; /* 31 storage logical validity */
+	__u32 ie :  1; /* 32 indirect storage error */
+	__u32 ar :  1; /* 33 access register validity */
+	__u32 da :  1; /* 34 delayed access exception */
+	__u32	 :  7; /* 35-41 */
+	__u32 pr :  1; /* 42 tod programmable register validity */
+	__u32 fc :  1; /* 43 fp control register validity */
+	__u32 ap :  1; /* 44 ancillary report */
+	__u32	 :  1; /* 45 */
+	__u32 ct :  1; /* 46 cpu timer validity */
+	__u32 cc :  1; /* 47 clock comparator validity */
+	__u32	 : 16; /* 47-63 */
+};
+
+struct pt_regs;
+
+extern void s390_handle_mcck(void);
+extern void s390_do_machine_check(struct pt_regs *regs);
+
+#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
new file mode 100644
index 000000000..295f2c4f1
--- /dev/null
+++ b/arch/s390/include/asm/os_info.h
@@ -0,0 +1,49 @@
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+#ifndef _ASM_S390_OS_INFO_H
+#define _ASM_S390_OS_INFO_H
+
+#define OS_INFO_VERSION_MAJOR	1
+#define OS_INFO_VERSION_MINOR	1
+#define OS_INFO_MAGIC		0x4f53494e464f535aULL /* OSINFOSZ */
+
+#define OS_INFO_VMCOREINFO	0
+#define OS_INFO_REIPL_BLOCK	1
+
+struct os_info_entry {
+	u64	addr;
+	u64	size;
+	u32	csum;
+} __packed;
+
+struct os_info {
+	u64	magic;
+	u32	csum;
+	u16	version_major;
+	u16	version_minor;
+	u64	crashkernel_addr;
+	u64	crashkernel_size;
+	struct os_info_entry entry[2];
+	u8	reserved[4024];
+} __packed;
+
+void os_info_init(void);
+void os_info_entry_add(int nr, void *ptr, u64 len);
+void os_info_crashkernel_add(unsigned long base, unsigned long size);
+u32 os_info_csum(struct os_info *os_info);
+
+#ifdef CONFIG_CRASH_DUMP
+void *os_info_old_entry(int nr, unsigned long *size);
+int copy_from_oldmem(void *dest, void *src, size_t count);
+#else
+static inline void *os_info_old_entry(int nr, unsigned long *size)
+{
+	return NULL;
+}
+#endif
+
+#endif /* _ASM_S390_OS_INFO_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
new file mode 100644
index 000000000..53eacbd4f
--- /dev/null
+++ b/arch/s390/include/asm/page.h
@@ -0,0 +1,156 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ */
+
+#ifndef _S390_PAGE_H
+#define _S390_PAGE_H
+
+#include <linux/const.h>
+#include <asm/types.h>
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT      12
+#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK       (~(PAGE_SIZE-1))
+#define PAGE_DEFAULT_ACC	0
+#define PAGE_DEFAULT_KEY	(PAGE_DEFAULT_ACC << 4)
+
+#define HPAGE_SHIFT	20
+#define HPAGE_SIZE	(1UL << HPAGE_SHIFT)
+#define HPAGE_MASK	(~(HPAGE_SIZE - 1))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+
+#define ARCH_HAS_SETCLEAR_HUGE_PTE
+#define ARCH_HAS_HUGE_PTE_TYPE
+#define ARCH_HAS_PREPARE_HUGEPAGE
+#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
+
+#include <asm/setup.h>
+#ifndef __ASSEMBLY__
+
+static inline void storage_key_init_range(unsigned long start, unsigned long end)
+{
+#if PAGE_DEFAULT_KEY
+	__storage_key_init_range(start, end);
+#endif
+}
+
+#define clear_page(page)	memset((page), 0, PAGE_SIZE)
+
+/*
+ * copy_page uses the mvcl instruction with 0xb0 padding byte in order to
+ * bypass caches when copying a page. Especially when copying huge pages
+ * this keeps L1 and L2 data caches alive.
+ */
+static inline void copy_page(void *to, void *from)
+{
+	register void *reg2 asm ("2") = to;
+	register unsigned long reg3 asm ("3") = 0x1000;
+	register void *reg4 asm ("4") = from;
+	register unsigned long reg5 asm ("5") = 0xb0001000;
+	asm volatile(
+		"	mvcl	2,4"
+		: "+d" (reg2), "+d" (reg3), "+d" (reg4), "+d" (reg5)
+		: : "memory", "cc");
+}
+
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+
+#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+
+/*
+ * These are used to make use of C type-checking..
+ */
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct { unsigned long pgste; } pgste_t;
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef pte_t *pgtable_t;
+
+#define pgprot_val(x)	((x).pgprot)
+#define pgste_val(x)	((x).pgste)
+#define pte_val(x)	((x).pte)
+#define pmd_val(x)	((x).pmd)
+#define pud_val(x)	((x).pud)
+#define pgd_val(x)      ((x).pgd)
+
+#define __pgste(x)	((pgste_t) { (x) } )
+#define __pte(x)        ((pte_t) { (x) } )
+#define __pmd(x)        ((pmd_t) { (x) } )
+#define __pud(x)	((pud_t) { (x) } )
+#define __pgd(x)        ((pgd_t) { (x) } )
+#define __pgprot(x)     ((pgprot_t) { (x) } )
+
+static inline void page_set_storage_key(unsigned long addr,
+					unsigned char skey, int mapped)
+{
+	if (!mapped)
+		asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0"
+			     : : "d" (skey), "a" (addr));
+	else
+		asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static inline unsigned char page_get_storage_key(unsigned long addr)
+{
+	unsigned char skey;
+
+	asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr));
+	return skey;
+}
+
+static inline int page_reset_referenced(unsigned long addr)
+{
+	unsigned int ipm;
+
+	asm volatile(
+		"	rrbe	0,%1\n"
+		"	ipm	%0\n"
+		: "=d" (ipm) : "a" (addr) : "cc");
+	return !!(ipm & 0x20000000);
+}
+
+/* Bits int the storage key */
+#define _PAGE_CHANGED		0x02	/* HW changed bit		*/
+#define _PAGE_REFERENCED	0x04	/* HW referenced bit		*/
+#define _PAGE_FP_BIT		0x08	/* HW fetch protection bit	*/
+#define _PAGE_ACC_BITS		0xf0	/* HW access control bits	*/
+
+struct page;
+void arch_free_page(struct page *page, int order);
+void arch_alloc_page(struct page *page, int order);
+void arch_set_page_states(int make_stable);
+
+static inline int devmem_is_allowed(unsigned long pfn)
+{
+	return 0;
+}
+
+#define HAVE_ARCH_FREE_PAGE
+#define HAVE_ARCH_ALLOC_PAGE
+
+#endif /* !__ASSEMBLY__ */
+
+#define __PAGE_OFFSET           0x0UL
+#define PAGE_OFFSET             0x0UL
+#define __pa(x)                 (unsigned long)(x)
+#define __va(x)                 (void *)(unsigned long)(x)
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
new file mode 100644
index 000000000..a648338c4
--- /dev/null
+++ b/arch/s390/include/asm/pci.h
@@ -0,0 +1,191 @@
+#ifndef __ASM_S390_PCI_H
+#define __ASM_S390_PCI_H
+
+/* must be set before including asm-generic/pci.h */
+#define PCI_DMA_BUS_IS_PHYS (0)
+/* must be set before including pci_clp.h */
+#define PCI_BAR_COUNT	6
+
+#include <linux/pci.h>
+#include <linux/mutex.h>
+#include <asm-generic/pci.h>
+#include <asm-generic/pci-dma-compat.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_debug.h>
+
+#define PCIBIOS_MIN_IO		0x1000
+#define PCIBIOS_MIN_MEM		0x10000000
+
+#define pcibios_assign_all_busses()	(0)
+
+void __iomem *pci_iomap(struct pci_dev *, int, unsigned long);
+void pci_iounmap(struct pci_dev *, void __iomem *);
+int pci_domain_nr(struct pci_bus *);
+int pci_proc_domain(struct pci_bus *);
+
+#define ZPCI_BUS_NR			0	/* default bus number */
+#define ZPCI_DEVFN			0	/* default device number */
+
+/* PCI Function Controls */
+#define ZPCI_FC_FN_ENABLED		0x80
+#define ZPCI_FC_ERROR			0x40
+#define ZPCI_FC_BLOCKED			0x20
+#define ZPCI_FC_DMA_ENABLED		0x10
+
+struct zpci_fmb {
+	u32 format	:  8;
+	u32 dma_valid	:  1;
+	u32		: 23;
+	u32 samples;
+	u64 last_update;
+	/* hardware counters */
+	u64 ld_ops;
+	u64 st_ops;
+	u64 stb_ops;
+	u64 rpcit_ops;
+	u64 dma_rbytes;
+	u64 dma_wbytes;
+} __packed __aligned(16);
+
+enum zpci_state {
+	ZPCI_FN_STATE_RESERVED,
+	ZPCI_FN_STATE_STANDBY,
+	ZPCI_FN_STATE_CONFIGURED,
+	ZPCI_FN_STATE_ONLINE,
+	NR_ZPCI_FN_STATES,
+};
+
+struct zpci_bar_struct {
+	struct resource *res;		/* bus resource */
+	u32		val;		/* bar start & 3 flag bits */
+	u16		map_idx;	/* index into bar mapping array */
+	u8		size;		/* order 2 exponent */
+};
+
+/* Private data per function */
+struct zpci_dev {
+	struct pci_dev	*pdev;
+	struct pci_bus	*bus;
+	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
+
+	enum zpci_state state;
+	u32		fid;		/* function ID, used by sclp */
+	u32		fh;		/* function handle, used by insn's */
+	u16		vfn;		/* virtual function number */
+	u16		pchid;		/* physical channel ID */
+	u8		pfgid;		/* function group ID */
+	u8		pft;		/* pci function type */
+	u16		domain;
+
+	struct mutex lock;
+	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
+	u32 uid;			/* user defined id */
+	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
+
+	/* IRQ stuff */
+	u64		msi_addr;	/* MSI address */
+	unsigned int	max_msi;	/* maximum number of MSI's */
+	struct airq_iv *aibv;		/* adapter interrupt bit vector */
+	unsigned int	aisb;		/* number of the summary bit */
+
+	/* DMA stuff */
+	unsigned long	*dma_table;
+	spinlock_t	dma_table_lock;
+	int		tlb_refresh;
+
+	spinlock_t	iommu_bitmap_lock;
+	unsigned long	*iommu_bitmap;
+	unsigned long	iommu_size;
+	unsigned long	iommu_pages;
+	unsigned int	next_bit;
+
+	char res_name[16];
+	struct zpci_bar_struct bars[PCI_BAR_COUNT];
+
+	u64		start_dma;	/* Start of available DMA addresses */
+	u64		end_dma;	/* End of available DMA addresses */
+	u64		dma_mask;	/* DMA address space mask */
+
+	/* Function measurement block */
+	struct zpci_fmb *fmb;
+	u16		fmb_update;	/* update interval */
+	/* software counters */
+	atomic64_t allocated_pages;
+	atomic64_t mapped_pages;
+	atomic64_t unmapped_pages;
+
+	enum pci_bus_speed max_bus_speed;
+
+	struct dentry	*debugfs_dev;
+	struct dentry	*debugfs_perf;
+};
+
+static inline bool zdev_enabled(struct zpci_dev *zdev)
+{
+	return (zdev->fh & (1UL << 31)) ? true : false;
+}
+
+extern const struct attribute_group *zpci_attr_groups[];
+
+/* -----------------------------------------------------------------------------
+  Prototypes
+----------------------------------------------------------------------------- */
+/* Base stuff */
+int zpci_create_device(struct zpci_dev *);
+int zpci_enable_device(struct zpci_dev *);
+int zpci_disable_device(struct zpci_dev *);
+void zpci_stop_device(struct zpci_dev *);
+int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
+int zpci_unregister_ioat(struct zpci_dev *, u8);
+
+/* CLP */
+int clp_scan_pci_devices(void);
+int clp_rescan_pci_devices(void);
+int clp_rescan_pci_devices_simple(void);
+int clp_add_pci_device(u32, u32, int);
+int clp_enable_fh(struct zpci_dev *, u8);
+int clp_disable_fh(struct zpci_dev *);
+
+#ifdef CONFIG_PCI
+/* Error handling and recovery */
+void zpci_event_error(void *);
+void zpci_event_availability(void *);
+void zpci_rescan(void);
+bool zpci_is_enabled(void);
+#else /* CONFIG_PCI */
+static inline void zpci_event_error(void *e) {}
+static inline void zpci_event_availability(void *e) {}
+static inline void zpci_rescan(void) {}
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_HOTPLUG_PCI_S390
+int zpci_init_slot(struct zpci_dev *);
+void zpci_exit_slot(struct zpci_dev *);
+#else /* CONFIG_HOTPLUG_PCI_S390 */
+static inline int zpci_init_slot(struct zpci_dev *zdev)
+{
+	return 0;
+}
+static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
+#endif /* CONFIG_HOTPLUG_PCI_S390 */
+
+/* Helpers */
+struct zpci_dev *get_zdev(struct pci_dev *);
+struct zpci_dev *get_zdev_by_fid(u32);
+
+/* DMA */
+int zpci_dma_init(void);
+void zpci_dma_exit(void);
+
+/* FMB */
+int zpci_fmb_enable_device(struct zpci_dev *);
+int zpci_fmb_disable_device(struct zpci_dev *);
+
+/* Debug */
+int zpci_debug_init(void);
+void zpci_debug_exit(void);
+void zpci_debug_init_device(struct zpci_dev *);
+void zpci_debug_exit_device(struct zpci_dev *);
+void zpci_debug_info(struct zpci_dev *, struct seq_file *);
+
+#endif
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
new file mode 100644
index 000000000..dd78f92f1
--- /dev/null
+++ b/arch/s390/include/asm/pci_clp.h
@@ -0,0 +1,186 @@
+#ifndef _ASM_S390_PCI_CLP_H
+#define _ASM_S390_PCI_CLP_H
+
+#include <asm/clp.h>
+
+/*
+ * Call Logical Processor - Command Codes
+ */
+#define CLP_LIST_PCI		0x0002
+#define CLP_QUERY_PCI_FN	0x0003
+#define CLP_QUERY_PCI_FNGRP	0x0004
+#define CLP_SET_PCI_FN		0x0005
+
+/* PCI function handle list entry */
+struct clp_fh_list_entry {
+	u16 device_id;
+	u16 vendor_id;
+	u32 config_state :  1;
+	u32		 : 31;
+	u32 fid;		/* PCI function id */
+	u32 fh;			/* PCI function handle */
+} __packed;
+
+#define CLP_RC_SETPCIFN_FH	0x0101	/* Invalid PCI fn handle */
+#define CLP_RC_SETPCIFN_FHOP	0x0102	/* Fn handle not valid for op */
+#define CLP_RC_SETPCIFN_DMAAS	0x0103	/* Invalid DMA addr space */
+#define CLP_RC_SETPCIFN_RES	0x0104	/* Insufficient resources */
+#define CLP_RC_SETPCIFN_ALRDY	0x0105	/* Fn already in requested state */
+#define CLP_RC_SETPCIFN_ERR	0x0106	/* Fn in permanent error state */
+#define CLP_RC_SETPCIFN_RECPND	0x0107	/* Error recovery pending */
+#define CLP_RC_SETPCIFN_BUSY	0x0108	/* Fn busy */
+#define CLP_RC_LISTPCI_BADRT	0x010a	/* Resume token not recognized */
+#define CLP_RC_QUERYPCIFG_PFGID	0x010b	/* Unrecognized PFGID */
+
+/* request or response block header length */
+#define LIST_PCI_HDR_LEN	32
+
+/* Number of function handles fitting in response block */
+#define CLP_FH_LIST_NR_ENTRIES				\
+	((CLP_BLK_SIZE - 2 * LIST_PCI_HDR_LEN)		\
+		/ sizeof(struct clp_fh_list_entry))
+
+#define CLP_SET_ENABLE_PCI_FN	0	/* Yes, 0 enables it */
+#define CLP_SET_DISABLE_PCI_FN	1	/* Yes, 1 disables it */
+
+#define CLP_UTIL_STR_LEN	64
+#define CLP_PFIP_NR_SEGMENTS	4
+
+/* List PCI functions request */
+struct clp_req_list_pci {
+	struct clp_req_hdr hdr;
+	u32 fmt			:  4;	/* cmd request block format */
+	u32			: 28;
+	u64 reserved1;
+	u64 resume_token;
+	u64 reserved2;
+} __packed;
+
+/* List PCI functions response */
+struct clp_rsp_list_pci {
+	struct clp_rsp_hdr hdr;
+	u32 fmt			:  4;	/* cmd request block format */
+	u32			: 28;
+	u64 reserved1;
+	u64 resume_token;
+	u32 reserved2;
+	u16 max_fn;
+	u8 reserved3;
+	u8 entry_size;
+	struct clp_fh_list_entry fh_list[CLP_FH_LIST_NR_ENTRIES];
+} __packed;
+
+/* Query PCI function request */
+struct clp_req_query_pci {
+	struct clp_req_hdr hdr;
+	u32 fmt			:  4;	/* cmd request block format */
+	u32			: 28;
+	u64 reserved1;
+	u32 fh;				/* function handle */
+	u32 reserved2;
+	u64 reserved3;
+} __packed;
+
+/* Query PCI function response */
+struct clp_rsp_query_pci {
+	struct clp_rsp_hdr hdr;
+	u32 fmt			:  4;	/* cmd request block format */
+	u32			: 28;
+	u64			: 64;
+	u16 vfn;			/* virtual fn number */
+	u16			:  7;
+	u16 util_str_avail	:  1;	/* utility string available? */
+	u16 pfgid		:  8;	/* pci function group id */
+	u32 fid;			/* pci function id */
+	u8 bar_size[PCI_BAR_COUNT];
+	u16 pchid;
+	u32 bar[PCI_BAR_COUNT];
+	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
+	u32			: 24;
+	u8 pft;				/* pci function type */
+	u64 sdma;			/* start dma as */
+	u64 edma;			/* end dma as */
+	u32 reserved[11];
+	u32 uid;			/* user defined id */
+	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
+} __packed;
+
+/* Query PCI function group request */
+struct clp_req_query_pci_grp {
+	struct clp_req_hdr hdr;
+	u32 fmt			:  4;	/* cmd request block format */
+	u32			: 28;
+	u64 reserved1;
+	u32			: 24;
+	u32 pfgid		:  8;	/* function group id */
+	u32 reserved2;
+	u64 reserved3;
+} __packed;
+
+/* Query PCI function group response */
+struct clp_rsp_query_pci_grp {
+	struct clp_rsp_hdr hdr;
+	u32 fmt			:  4;	/* cmd request block format */
+	u32			: 28;
+	u64 reserved1;
+	u16			:  4;
+	u16 noi			: 12;	/* number of interrupts */
+	u8 version;
+	u8			:  6;
+	u8 frame		:  1;
+	u8 refresh		:  1;	/* TLB refresh mode */
+	u16 reserved2;
+	u16 mui;
+	u64 reserved3;
+	u64 dasm;			/* dma address space mask */
+	u64 msia;			/* MSI address */
+	u64 reserved4;
+	u64 reserved5;
+} __packed;
+
+/* Set PCI function request */
+struct clp_req_set_pci {
+	struct clp_req_hdr hdr;
+	u32 fmt			:  4;	/* cmd request block format */
+	u32			: 28;
+	u64 reserved1;
+	u32 fh;				/* function handle */
+	u16 reserved2;
+	u8 oc;				/* operation controls */
+	u8 ndas;			/* number of dma spaces */
+	u64 reserved3;
+} __packed;
+
+/* Set PCI function response */
+struct clp_rsp_set_pci {
+	struct clp_rsp_hdr hdr;
+	u32 fmt			:  4;	/* cmd request block format */
+	u32			: 28;
+	u64 reserved1;
+	u32 fh;				/* function handle */
+	u32 reserved3;
+	u64 reserved4;
+} __packed;
+
+/* Combined request/response block structures used by clp insn */
+struct clp_req_rsp_list_pci {
+	struct clp_req_list_pci request;
+	struct clp_rsp_list_pci response;
+} __packed;
+
+struct clp_req_rsp_set_pci {
+	struct clp_req_set_pci request;
+	struct clp_rsp_set_pci response;
+} __packed;
+
+struct clp_req_rsp_query_pci {
+	struct clp_req_query_pci request;
+	struct clp_rsp_query_pci response;
+} __packed;
+
+struct clp_req_rsp_query_pci_grp {
+	struct clp_req_query_pci_grp request;
+	struct clp_rsp_query_pci_grp response;
+} __packed;
+
+#endif
diff --git a/arch/s390/include/asm/pci_debug.h b/arch/s390/include/asm/pci_debug.h
new file mode 100644
index 000000000..ac24b26fc
--- /dev/null
+++ b/arch/s390/include/asm/pci_debug.h
@@ -0,0 +1,28 @@
+#ifndef _S390_ASM_PCI_DEBUG_H
+#define _S390_ASM_PCI_DEBUG_H
+
+#include <asm/debug.h>
+
+extern debug_info_t *pci_debug_msg_id;
+extern debug_info_t *pci_debug_err_id;
+
+#define zpci_dbg(imp, fmt, args...)				\
+	debug_sprintf_event(pci_debug_msg_id, imp, fmt, ##args)
+
+#define zpci_err(text...)							\
+	do {									\
+		char debug_buffer[16];						\
+		snprintf(debug_buffer, 16, text);				\
+		debug_text_event(pci_debug_err_id, 0, debug_buffer);		\
+	} while (0)
+
+static inline void zpci_err_hex(void *addr, int len)
+{
+	while (len > 0) {
+		debug_event(pci_debug_err_id, 0, (void *) addr, len);
+		len -= pci_debug_err_id->buf_size;
+		addr += pci_debug_err_id->buf_size;
+	}
+}
+
+#endif
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
new file mode 100644
index 000000000..30b4c179c
--- /dev/null
+++ b/arch/s390/include/asm/pci_dma.h
@@ -0,0 +1,196 @@
+#ifndef _ASM_S390_PCI_DMA_H
+#define _ASM_S390_PCI_DMA_H
+
+/* I/O Translation Anchor (IOTA) */
+enum zpci_ioat_dtype {
+	ZPCI_IOTA_STO = 0,
+	ZPCI_IOTA_RTTO = 1,
+	ZPCI_IOTA_RSTO = 2,
+	ZPCI_IOTA_RFTO = 3,
+	ZPCI_IOTA_PFAA = 4,
+	ZPCI_IOTA_IOPFAA = 5,
+	ZPCI_IOTA_IOPTO = 7
+};
+
+#define ZPCI_IOTA_IOT_ENABLED		0x800UL
+#define ZPCI_IOTA_DT_ST			(ZPCI_IOTA_STO	<< 2)
+#define ZPCI_IOTA_DT_RT			(ZPCI_IOTA_RTTO << 2)
+#define ZPCI_IOTA_DT_RS			(ZPCI_IOTA_RSTO << 2)
+#define ZPCI_IOTA_DT_RF			(ZPCI_IOTA_RFTO << 2)
+#define ZPCI_IOTA_DT_PF			(ZPCI_IOTA_PFAA << 2)
+#define ZPCI_IOTA_FS_4K			0
+#define ZPCI_IOTA_FS_1M			1
+#define ZPCI_IOTA_FS_2G			2
+#define ZPCI_KEY			(PAGE_DEFAULT_KEY << 5)
+
+#define ZPCI_IOTA_STO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
+#define ZPCI_IOTA_RTTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
+#define ZPCI_IOTA_RSTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RS)
+#define ZPCI_IOTA_RFTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RF)
+#define ZPCI_IOTA_RFAA_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_PF | ZPCI_IOTA_FS_2G)
+
+/* I/O Region and segment tables */
+#define ZPCI_INDEX_MASK			0x7ffUL
+
+#define ZPCI_TABLE_TYPE_MASK		0xc
+#define ZPCI_TABLE_TYPE_RFX		0xc
+#define ZPCI_TABLE_TYPE_RSX		0x8
+#define ZPCI_TABLE_TYPE_RTX		0x4
+#define ZPCI_TABLE_TYPE_SX		0x0
+
+#define ZPCI_TABLE_LEN_RFX		0x3
+#define ZPCI_TABLE_LEN_RSX		0x3
+#define ZPCI_TABLE_LEN_RTX		0x3
+
+#define ZPCI_TABLE_OFFSET_MASK		0xc0
+#define ZPCI_TABLE_SIZE			0x4000
+#define ZPCI_TABLE_ALIGN		ZPCI_TABLE_SIZE
+#define ZPCI_TABLE_ENTRY_SIZE		(sizeof(unsigned long))
+#define ZPCI_TABLE_ENTRIES		(ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+
+#define ZPCI_TABLE_BITS			11
+#define ZPCI_PT_BITS			8
+#define ZPCI_ST_SHIFT			(ZPCI_PT_BITS + PAGE_SHIFT)
+#define ZPCI_RT_SHIFT			(ZPCI_ST_SHIFT + ZPCI_TABLE_BITS)
+
+#define ZPCI_RTE_FLAG_MASK		0x3fffUL
+#define ZPCI_RTE_ADDR_MASK		(~ZPCI_RTE_FLAG_MASK)
+#define ZPCI_STE_FLAG_MASK		0x7ffUL
+#define ZPCI_STE_ADDR_MASK		(~ZPCI_STE_FLAG_MASK)
+
+/* I/O Page tables */
+#define ZPCI_PTE_VALID_MASK		0x400
+#define ZPCI_PTE_INVALID		0x400
+#define ZPCI_PTE_VALID			0x000
+#define ZPCI_PT_SIZE			0x800
+#define ZPCI_PT_ALIGN			ZPCI_PT_SIZE
+#define ZPCI_PT_ENTRIES			(ZPCI_PT_SIZE / ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_PT_MASK			(ZPCI_PT_ENTRIES - 1)
+
+#define ZPCI_PTE_FLAG_MASK		0xfffUL
+#define ZPCI_PTE_ADDR_MASK		(~ZPCI_PTE_FLAG_MASK)
+
+/* Shared bits */
+#define ZPCI_TABLE_VALID		0x00
+#define ZPCI_TABLE_INVALID		0x20
+#define ZPCI_TABLE_PROTECTED		0x200
+#define ZPCI_TABLE_UNPROTECTED		0x000
+
+#define ZPCI_TABLE_VALID_MASK		0x20
+#define ZPCI_TABLE_PROT_MASK		0x200
+
+static inline unsigned int calc_rtx(dma_addr_t ptr)
+{
+	return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_sx(dma_addr_t ptr)
+{
+	return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_px(dma_addr_t ptr)
+{
+	return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static inline void set_pt_pfaa(unsigned long *entry, void *pfaa)
+{
+	*entry &= ZPCI_PTE_FLAG_MASK;
+	*entry |= ((unsigned long) pfaa & ZPCI_PTE_ADDR_MASK);
+}
+
+static inline void set_rt_sto(unsigned long *entry, void *sto)
+{
+	*entry &= ZPCI_RTE_FLAG_MASK;
+	*entry |= ((unsigned long) sto & ZPCI_RTE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_RTX;
+}
+
+static inline void set_st_pto(unsigned long *entry, void *pto)
+{
+	*entry &= ZPCI_STE_FLAG_MASK;
+	*entry |= ((unsigned long) pto & ZPCI_STE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_SX;
+}
+
+static inline void validate_rt_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+	*entry |= ZPCI_TABLE_LEN_RTX;
+}
+
+static inline void validate_st_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+}
+
+static inline void invalidate_table_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry |= ZPCI_TABLE_INVALID;
+}
+
+static inline void invalidate_pt_entry(unsigned long *entry)
+{
+	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
+	*entry &= ~ZPCI_PTE_VALID_MASK;
+	*entry |= ZPCI_PTE_INVALID;
+}
+
+static inline void validate_pt_entry(unsigned long *entry)
+{
+	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
+	*entry &= ~ZPCI_PTE_VALID_MASK;
+	*entry |= ZPCI_PTE_VALID;
+}
+
+static inline void entry_set_protected(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_PROT_MASK;
+	*entry |= ZPCI_TABLE_PROTECTED;
+}
+
+static inline void entry_clr_protected(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_PROT_MASK;
+	*entry |= ZPCI_TABLE_UNPROTECTED;
+}
+
+static inline int reg_entry_isvalid(unsigned long entry)
+{
+	return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static inline int pt_entry_isvalid(unsigned long entry)
+{
+	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static inline int entry_isprotected(unsigned long entry)
+{
+	return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
+}
+
+static inline unsigned long *get_rt_sto(unsigned long entry)
+{
+	return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+		? (unsigned long *) (entry & ZPCI_RTE_ADDR_MASK)
+		: NULL;
+}
+
+static inline unsigned long *get_st_pto(unsigned long entry)
+{
+	return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+		? (unsigned long *) (entry & ZPCI_STE_ADDR_MASK)
+		: NULL;
+}
+
+/* Prototypes */
+int zpci_dma_init_device(struct zpci_dev *);
+void zpci_dma_exit_device(struct zpci_dev *);
+
+#endif
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
new file mode 100644
index 000000000..649eb62c5
--- /dev/null
+++ b/arch/s390/include/asm/pci_insn.h
@@ -0,0 +1,86 @@
+#ifndef _ASM_S390_PCI_INSN_H
+#define _ASM_S390_PCI_INSN_H
+
+/* Load/Store status codes */
+#define ZPCI_PCI_ST_FUNC_NOT_ENABLED		4
+#define ZPCI_PCI_ST_FUNC_IN_ERR			8
+#define ZPCI_PCI_ST_BLOCKED			12
+#define ZPCI_PCI_ST_INSUF_RES			16
+#define ZPCI_PCI_ST_INVAL_AS			20
+#define ZPCI_PCI_ST_FUNC_ALREADY_ENABLED	24
+#define ZPCI_PCI_ST_DMA_AS_NOT_ENABLED		28
+#define ZPCI_PCI_ST_2ND_OP_IN_INV_AS		36
+#define ZPCI_PCI_ST_FUNC_NOT_AVAIL		40
+#define ZPCI_PCI_ST_ALREADY_IN_RQ_STATE		44
+
+/* Load/Store return codes */
+#define ZPCI_PCI_LS_OK				0
+#define ZPCI_PCI_LS_ERR				1
+#define ZPCI_PCI_LS_BUSY			2
+#define ZPCI_PCI_LS_INVAL_HANDLE		3
+
+/* Load/Store address space identifiers */
+#define ZPCI_PCIAS_MEMIO_0			0
+#define ZPCI_PCIAS_MEMIO_1			1
+#define ZPCI_PCIAS_MEMIO_2			2
+#define ZPCI_PCIAS_MEMIO_3			3
+#define ZPCI_PCIAS_MEMIO_4			4
+#define ZPCI_PCIAS_MEMIO_5			5
+#define ZPCI_PCIAS_CFGSPC			15
+
+/* Modify PCI Function Controls */
+#define ZPCI_MOD_FC_REG_INT	2
+#define ZPCI_MOD_FC_DEREG_INT	3
+#define ZPCI_MOD_FC_REG_IOAT	4
+#define ZPCI_MOD_FC_DEREG_IOAT	5
+#define ZPCI_MOD_FC_REREG_IOAT	6
+#define ZPCI_MOD_FC_RESET_ERROR	7
+#define ZPCI_MOD_FC_RESET_BLOCK	9
+#define ZPCI_MOD_FC_SET_MEASURE	10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED	0x80
+#define ZPCI_FIB_FC_ERROR	0x40
+#define ZPCI_FIB_FC_LS_BLOCKED	0x20
+#define ZPCI_FIB_FC_DMAAS_REG	0x10
+
+/* FIB function controls */
+#define ZPCI_FIB_FC_ENABLED	0x80
+#define ZPCI_FIB_FC_ERROR	0x40
+#define ZPCI_FIB_FC_LS_BLOCKED	0x20
+#define ZPCI_FIB_FC_DMAAS_REG	0x10
+
+/* Function Information Block */
+struct zpci_fib {
+	u32 fmt		:  8;	/* format */
+	u32		: 24;
+	u32		: 32;
+	u8 fc;			/* function controls */
+	u64		: 56;
+	u64 pba;		/* PCI base address */
+	u64 pal;		/* PCI address limit */
+	u64 iota;		/* I/O Translation Anchor */
+	u32		:  1;
+	u32 isc		:  3;	/* Interrupt subclass */
+	u32 noi		: 12;	/* Number of interrupts */
+	u32		:  2;
+	u32 aibvo	:  6;	/* Adapter interrupt bit vector offset */
+	u32 sum		:  1;	/* Adapter int summary bit enabled */
+	u32		:  1;
+	u32 aisbo	:  6;	/* Adapter int summary bit offset */
+	u32		: 32;
+	u64 aibv;		/* Adapter int bit vector address */
+	u64 aisb;		/* Adapter int summary bit address */
+	u64 fmb_addr;		/* Function measurement block address and key */
+	u32		: 32;
+	u32 gd;
+} __packed __aligned(8);
+
+int zpci_mod_fc(u64 req, struct zpci_fib *fib);
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
+int zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+
+#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
new file mode 100644
index 000000000..1a9a98de5
--- /dev/null
+++ b/arch/s390/include/asm/pci_io.h
@@ -0,0 +1,201 @@
+#ifndef _ASM_S390_PCI_IO_H
+#define _ASM_S390_PCI_IO_H
+
+#ifdef CONFIG_PCI
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/pci_insn.h>
+
+/* I/O Map */
+#define ZPCI_IOMAP_MAX_ENTRIES		0x7fff
+#define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000ULL
+#define ZPCI_IOMAP_ADDR_IDX_MASK	0x7fff000000000000ULL
+#define ZPCI_IOMAP_ADDR_OFF_MASK	0x0000ffffffffffffULL
+
+struct zpci_iomap_entry {
+	u32 fh;
+	u8 bar;
+	u16 count;
+};
+
+extern struct zpci_iomap_entry *zpci_iomap_start;
+
+#define ZPCI_IDX(addr)								\
+	(((__force u64) addr & ZPCI_IOMAP_ADDR_IDX_MASK) >> 48)
+#define ZPCI_OFFSET(addr)							\
+	((__force u64) addr & ZPCI_IOMAP_ADDR_OFF_MASK)
+
+#define ZPCI_CREATE_REQ(handle, space, len)					\
+	((u64) handle << 32 | space << 16 | len)
+
+#define zpci_read(LENGTH, RETTYPE)						\
+static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr)	\
+{										\
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];	\
+	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH);		\
+	u64 data;								\
+	int rc;									\
+										\
+	rc = zpci_load(&data, req, ZPCI_OFFSET(addr));				\
+	if (rc)									\
+		data = -1ULL;							\
+	return (RETTYPE) data;							\
+}
+
+#define zpci_write(LENGTH, VALTYPE)						\
+static inline void zpci_write_##VALTYPE(VALTYPE val,				\
+					const volatile void __iomem *addr)	\
+{										\
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];	\
+	u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH);		\
+	u64 data = (VALTYPE) val;						\
+										\
+	zpci_store(data, req, ZPCI_OFFSET(addr));				\
+}
+
+zpci_read(8, u64)
+zpci_read(4, u32)
+zpci_read(2, u16)
+zpci_read(1, u8)
+zpci_write(8, u64)
+zpci_write(4, u32)
+zpci_write(2, u16)
+zpci_write(1, u8)
+
+static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len)
+{
+	u64 val;
+
+	switch (len) {
+	case 1:
+		val = (u64) *((u8 *) data);
+		break;
+	case 2:
+		val = (u64) *((u16 *) data);
+		break;
+	case 4:
+		val = (u64) *((u32 *) data);
+		break;
+	case 8:
+		val = (u64) *((u64 *) data);
+		break;
+	default:
+		val = 0;		/* let FW report error */
+		break;
+	}
+	return zpci_store(val, req, offset);
+}
+
+static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
+{
+	u64 data;
+	int cc;
+
+	cc = zpci_load(&data, req, offset);
+	if (cc)
+		goto out;
+
+	switch (len) {
+	case 1:
+		*((u8 *) dst) = (u8) data;
+		break;
+	case 2:
+		*((u16 *) dst) = (u16) data;
+		break;
+	case 4:
+		*((u32 *) dst) = (u32) data;
+		break;
+	case 8:
+		*((u64 *) dst) = (u64) data;
+		break;
+	}
+out:
+	return cc;
+}
+
+static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
+{
+	return zpci_store_block(data, req, offset);
+}
+
+static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
+{
+	int count = len > max ? max : len, size = 1;
+
+	while (!(src & 0x1) && !(dst & 0x1) && ((size << 1) <= count)) {
+		dst = dst >> 1;
+		src = src >> 1;
+		size = size << 1;
+	}
+	return size;
+}
+
+static inline int zpci_memcpy_fromio(void *dst,
+				     const volatile void __iomem *src,
+				     unsigned long n)
+{
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(src)];
+	u64 req, offset = ZPCI_OFFSET(src);
+	int size, rc = 0;
+
+	while (n > 0) {
+		size = zpci_get_max_write_size((u64 __force) src,
+					       (u64) dst, n, 8);
+		req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
+		rc = zpci_read_single(req, dst, offset, size);
+		if (rc)
+			break;
+		offset += size;
+		dst += size;
+		n -= size;
+	}
+	return rc;
+}
+
+static inline int zpci_memcpy_toio(volatile void __iomem *dst,
+				   const void *src, unsigned long n)
+{
+	struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
+	u64 req, offset = ZPCI_OFFSET(dst);
+	int size, rc = 0;
+
+	if (!src)
+		return -EINVAL;
+
+	while (n > 0) {
+		size = zpci_get_max_write_size((u64 __force) dst,
+					       (u64) src, n, 128);
+		req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
+
+		if (size > 8) /* main path */
+			rc = zpci_write_block(req, src, offset);
+		else
+			rc = zpci_write_single(req, src, offset, size);
+		if (rc)
+			break;
+		offset += size;
+		src += size;
+		n -= size;
+	}
+	return rc;
+}
+
+static inline int zpci_memset_io(volatile void __iomem *dst,
+				 unsigned char val, size_t count)
+{
+	u8 *src = kmalloc(count, GFP_KERNEL);
+	int rc;
+
+	if (src == NULL)
+		return -ENOMEM;
+	memset(src, val, count);
+
+	rc = zpci_memcpy_toio(dst, src, count);
+	kfree(src);
+	return rc;
+}
+
+#endif /* CONFIG_PCI */
+
+#endif /* _ASM_S390_PCI_IO_H */
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
new file mode 100644
index 000000000..6d6556ca2
--- /dev/null
+++ b/arch/s390/include/asm/percpu.h
@@ -0,0 +1,186 @@
+#ifndef __ARCH_S390_PERCPU__
+#define __ARCH_S390_PERCPU__
+
+#include <linux/preempt.h>
+#include <asm/cmpxchg.h>
+
+/*
+ * s390 uses its own implementation for per cpu data, the offset of
+ * the cpu local data area is cached in the cpu's lowcore memory.
+ */
+#define __my_cpu_offset S390_lowcore.percpu_offset
+
+/*
+ * For 64 bit module code, the module may be more than 4G above the
+ * per cpu area, use weak definitions to force the compiler to
+ * generate external references.
+ */
+#if defined(CONFIG_SMP) && defined(MODULE)
+#define ARCH_NEEDS_WEAK_PER_CPU
+#endif
+
+/*
+ * We use a compare-and-swap loop since that uses less cpu cycles than
+ * disabling and enabling interrupts like the generic variant would do.
+ */
+#define arch_this_cpu_to_op_simple(pcp, val, op)			\
+({									\
+	typedef typeof(pcp) pcp_op_T__;					\
+	pcp_op_T__ old__, new__, prev__;				\
+	pcp_op_T__ *ptr__;						\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	prev__ = *ptr__;						\
+	do {								\
+		old__ = prev__;						\
+		new__ = old__ op (val);					\
+		prev__ = cmpxchg(ptr__, old__, new__);			\
+	} while (prev__ != old__);					\
+	preempt_enable();						\
+	new__;								\
+})
+
+#define this_cpu_add_1(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_2(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_1(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_2(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_1(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_2(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_1(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_2(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+
+#ifndef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define this_cpu_add_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_to_op_simple(pcp, val, +)
+#define this_cpu_and_4(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_and_8(pcp, val)	arch_this_cpu_to_op_simple(pcp, val, &)
+#define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+#define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op_simple(pcp, val, |)
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_add(pcp, val, op1, op2, szcast)			\
+{									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp)); 				\
+	if (__builtin_constant_p(val__) &&				\
+	    ((szcast)val__ > -129) && ((szcast)val__ < 128)) {		\
+		asm volatile(						\
+			op2 "   %[ptr__],%[val__]\n"			\
+			: [ptr__] "+Q" (*ptr__) 			\
+			: [val__] "i" ((szcast)val__)			\
+			: "cc");					\
+	} else {							\
+		asm volatile(						\
+			op1 "   %[old__],%[val__],%[ptr__]\n"		\
+			: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)	\
+			: [val__] "d" (val__)				\
+			: "cc");					\
+	}								\
+	preempt_enable();						\
+}
+
+#define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
+#define this_cpu_add_8(pcp, val) arch_this_cpu_add(pcp, val, "laag", "agsi", long)
+
+#define arch_this_cpu_add_return(pcp, val, op)				\
+({									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp));	 				\
+	asm volatile(							\
+		op "    %[old__],%[val__],%[ptr__]\n"			\
+		: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)		\
+		: [val__] "d" (val__)					\
+		: "cc");						\
+	preempt_enable();						\
+	old__ + val__;							\
+})
+
+#define this_cpu_add_return_4(pcp, val) arch_this_cpu_add_return(pcp, val, "laa")
+#define this_cpu_add_return_8(pcp, val) arch_this_cpu_add_return(pcp, val, "laag")
+
+#define arch_this_cpu_to_op(pcp, val, op)				\
+{									\
+	typedef typeof(pcp) pcp_op_T__; 				\
+	pcp_op_T__ val__ = (val);					\
+	pcp_op_T__ old__, *ptr__;					\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp));	 				\
+	asm volatile(							\
+		op "    %[old__],%[val__],%[ptr__]\n"			\
+		: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)		\
+		: [val__] "d" (val__)					\
+		: "cc");						\
+	preempt_enable();						\
+}
+
+#define this_cpu_and_4(pcp, val)	arch_this_cpu_to_op(pcp, val, "lan")
+#define this_cpu_and_8(pcp, val)	arch_this_cpu_to_op(pcp, val, "lang")
+#define this_cpu_or_4(pcp, val)		arch_this_cpu_to_op(pcp, val, "lao")
+#define this_cpu_or_8(pcp, val)		arch_this_cpu_to_op(pcp, val, "laog")
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define arch_this_cpu_cmpxchg(pcp, oval, nval)				\
+({									\
+	typedef typeof(pcp) pcp_op_T__;					\
+	pcp_op_T__ ret__;						\
+	pcp_op_T__ *ptr__;						\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	ret__ = cmpxchg(ptr__, oval, nval);				\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#define this_cpu_cmpxchg_1(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) arch_this_cpu_cmpxchg(pcp, oval, nval)
+
+#define arch_this_cpu_xchg(pcp, nval)					\
+({									\
+	typeof(pcp) *ptr__;						\
+	typeof(pcp) ret__;						\
+	preempt_disable();						\
+	ptr__ = raw_cpu_ptr(&(pcp));					\
+	ret__ = xchg(ptr__, nval);					\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#define this_cpu_xchg_1(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+#define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
+
+#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2)	\
+({									\
+	typeof(pcp1) o1__ = (o1), n1__ = (n1);				\
+	typeof(pcp2) o2__ = (o2), n2__ = (n2);				\
+	typeof(pcp1) *p1__;						\
+	typeof(pcp2) *p2__;						\
+	int ret__;							\
+	preempt_disable();						\
+	p1__ = raw_cpu_ptr(&(pcp1));					\
+	p2__ = raw_cpu_ptr(&(pcp2));					\
+	ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__);	\
+	preempt_enable();						\
+	ret__;								\
+})
+
+#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double
+#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
new file mode 100644
index 000000000..4cb19fe76
--- /dev/null
+++ b/arch/s390/include/asm/perf_event.h
@@ -0,0 +1,93 @@
+/*
+ * Performance event support - s390 specific definitions.
+ *
+ * Copyright IBM Corp. 2009, 2013
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_S390_PERF_EVENT_H
+#define _ASM_S390_PERF_EVENT_H
+
+#include <linux/perf_event.h>
+#include <linux/device.h>
+#include <asm/cpu_mf.h>
+
+/* Per-CPU flags for PMU states */
+#define PMU_F_RESERVED			0x1000
+#define PMU_F_ENABLED			0x2000
+#define PMU_F_IN_USE			0x4000
+#define PMU_F_ERR_IBE			0x0100
+#define PMU_F_ERR_LSDA			0x0200
+#define PMU_F_ERR_MASK			(PMU_F_ERR_IBE|PMU_F_ERR_LSDA)
+
+/* Perf defintions for PMU event attributes in sysfs */
+extern __init const struct attribute_group **cpumf_cf_event_group(void);
+extern ssize_t cpumf_events_sysfs_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *page);
+#define EVENT_VAR(_cat, _name)		event_attr_##_cat##_##_name
+#define EVENT_PTR(_cat, _name)		(&EVENT_VAR(_cat, _name).attr.attr)
+
+#define CPUMF_EVENT_ATTR(cat, name, id)			\
+	PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show)
+#define CPUMF_EVENT_PTR(cat, name)	EVENT_PTR(cat, name)
+
+
+/* Perf callbacks */
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs) perf_misc_flags(regs)
+
+/* Perf pt_regs extension for sample-data-entry indicators */
+struct perf_sf_sde_regs {
+	unsigned char in_guest:1;	  /* guest sample */
+	unsigned long reserved:63;	  /* reserved */
+};
+
+/* Perf PMU definitions for the counter facility */
+#define PERF_CPUM_CF_MAX_CTR		256
+
+/* Perf PMU definitions for the sampling facility */
+#define PERF_CPUM_SF_MAX_CTR		2
+#define PERF_EVENT_CPUM_SF		0xB0000UL /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG		0xBD000UL /* Event: Combined-sampling */
+#define PERF_CPUM_SF_BASIC_MODE		0x0001	  /* Basic-sampling flag */
+#define PERF_CPUM_SF_DIAG_MODE		0x0002	  /* Diagnostic-sampling flag */
+#define PERF_CPUM_SF_MODE_MASK		(PERF_CPUM_SF_BASIC_MODE| \
+					 PERF_CPUM_SF_DIAG_MODE)
+#define PERF_CPUM_SF_FULL_BLOCKS	0x0004	  /* Process full SDBs only */
+
+#define REG_NONE		0
+#define REG_OVERFLOW		1
+#define OVERFLOW_REG(hwc)	((hwc)->extra_reg.config)
+#define SFB_ALLOC_REG(hwc)	((hwc)->extra_reg.alloc)
+#define RAWSAMPLE_REG(hwc)	((hwc)->config)
+#define TEAR_REG(hwc)		((hwc)->last_tag)
+#define SAMPL_RATE(hwc)		((hwc)->event_base)
+#define SAMPL_FLAGS(hwc)	((hwc)->config_base)
+#define SAMPL_DIAG_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE)
+#define SDB_FULL_BLOCKS(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
+
+/* Structure for sampling data entries to be passed as perf raw sample data
+ * to user space.  Note that raw sample data must be aligned and, thus, might
+ * be padded with zeros.
+ */
+struct sf_raw_sample {
+#define SF_RAW_SAMPLE_BASIC	PERF_CPUM_SF_BASIC_MODE
+#define SF_RAW_SAMPLE_DIAG	PERF_CPUM_SF_DIAG_MODE
+	u64			format;
+	u32			 size;	  /* Size of sf_raw_sample */
+	u16			bsdes;	  /* Basic-sampling data entry size */
+	u16			dsdes;	  /* Diagnostic-sampling data entry size */
+	struct hws_basic_entry	basic;	  /* Basic-sampling data entry */
+	struct hws_diag_entry	 diag;	  /* Diagnostic-sampling data entry */
+	u8		    padding[];	  /* Padding to next multiple of 8 */
+} __packed;
+
+/* Perf hardware reserve and release functions */
+int perf_reserve_sampling(void);
+void perf_release_sampling(void);
+
+#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
new file mode 100644
index 000000000..7b7858f15
--- /dev/null
+++ b/arch/s390/include/asm/pgalloc.h
@@ -0,0 +1,132 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/pgalloc.h"
+ *    Copyright (C) 1994  Linus Torvalds
+ */
+
+#ifndef _S390_PGALLOC_H
+#define _S390_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+
+unsigned long *crst_table_alloc(struct mm_struct *);
+void crst_table_free(struct mm_struct *, unsigned long *);
+
+unsigned long *page_table_alloc(struct mm_struct *);
+void page_table_free(struct mm_struct *, unsigned long *);
+void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
+extern int page_table_allocate_pgste;
+
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned long key, bool nq);
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
+
+static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
+{
+	typedef struct { char _[n]; } addrtype;
+
+	*s = val;
+	n = (n / 256) - 1;
+	asm volatile(
+		"	mvc	8(248,%0),0(%0)\n"
+		"0:	mvc	256(256,%0),0(%0)\n"
+		"	la	%0,256(%0)\n"
+		"	brct	%1,0b\n"
+		: "+a" (s), "+d" (n), "=m" (*(addrtype *) s)
+		: "m" (*(addrtype *) s));
+}
+
+static inline void crst_table_init(unsigned long *crst, unsigned long entry)
+{
+	clear_table(crst, entry, sizeof(unsigned long)*2048);
+}
+
+static inline unsigned long pgd_entry_type(struct mm_struct *mm)
+{
+	if (mm->context.asce_limit <= (1UL << 31))
+		return _SEGMENT_ENTRY_EMPTY;
+	if (mm->context.asce_limit <= (1UL << 42))
+		return _REGION3_ENTRY_EMPTY;
+	return _REGION2_ENTRY_EMPTY;
+}
+
+int crst_table_upgrade(struct mm_struct *, unsigned long limit);
+void crst_table_downgrade(struct mm_struct *, unsigned long limit);
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	unsigned long *table = crst_table_alloc(mm);
+	if (table)
+		crst_table_init(table, _REGION3_ENTRY_EMPTY);
+	return (pud_t *) table;
+}
+#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud)
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
+{
+	unsigned long *table = crst_table_alloc(mm);
+
+	if (!table)
+		return NULL;
+	crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
+	if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
+		crst_table_free(mm, table);
+		return NULL;
+	}
+	return (pmd_t *) table;
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	pgtable_pmd_page_dtor(virt_to_page(pmd));
+	crst_table_free(mm, (unsigned long *) pmd);
+}
+
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	pud_val(*pud) = _REGION3_ENTRY | __pa(pmd);
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	spin_lock_init(&mm->context.list_lock);
+	INIT_LIST_HEAD(&mm->context.pgtable_list);
+	INIT_LIST_HEAD(&mm->context.gmap_list);
+	return (pgd_t *) crst_table_alloc(mm);
+}
+#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
+
+static inline void pmd_populate(struct mm_struct *mm,
+				pmd_t *pmd, pgtable_t pte)
+{
+	pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte);
+}
+
+#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
+
+#define pmd_pgtable(pmd) \
+	(pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
+
+/*
+ * page table entry allocation/free routines.
+ */
+#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+
+#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
+#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
+
+extern void rcu_table_freelist_finish(void);
+
+#endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
new file mode 100644
index 000000000..ef24a212e
--- /dev/null
+++ b/arch/s390/include/asm/pgtable.h
@@ -0,0 +1,1637 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *               Ulrich Weigand (weigand@de.ibm.com)
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/pgtable.h"
+ */
+
+#ifndef _ASM_S390_PGTABLE_H
+#define _ASM_S390_PGTABLE_H
+
+/*
+ * The Linux memory management assumes a three-level page table setup.
+ * For s390 64 bit we use up to four of the five levels the hardware
+ * provides (region first tables are not used).
+ *
+ * The "pgd_xxx()" functions are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ *
+ * This file contains the functions and defines necessary to modify and use
+ * the S390 page table tree.
+ */
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/page-flags.h>
+#include <linux/radix-tree.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+
+extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096)));
+extern void paging_init(void);
+extern void vmem_map_init(void);
+
+/*
+ * The S390 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma, address, ptep)     do { } while (0)
+#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero; used
+ * for zero-mapped memory areas etc..
+ */
+
+extern unsigned long empty_zero_page;
+extern unsigned long zero_page_mask;
+
+#define ZERO_PAGE(vaddr) \
+	(virt_to_page((void *)(empty_zero_page + \
+	 (((unsigned long)(vaddr)) &zero_page_mask))))
+#define __HAVE_COLOR_ZERO_PAGE
+
+/* TODO: s390 cannot support io_remap_pfn_range... */
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * PMD_SHIFT determines the size of the area a second-level page
+ * table can map
+ * PGDIR_SHIFT determines what a third-level page table entry can map
+ */
+#define PMD_SHIFT	20
+#define PUD_SHIFT	31
+#define PGDIR_SHIFT	42
+
+#define PMD_SIZE        (1UL << PMD_SHIFT)
+#define PMD_MASK        (~(PMD_SIZE-1))
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/*
+ * entries per page directory level: the S390 is two-level, so
+ * we don't really have any PMD directory physically.
+ * for S390 segment-table entries are combined to one PGD
+ * that leads to 1024 pte per pgd
+ */
+#define PTRS_PER_PTE	256
+#define PTRS_PER_PMD	2048
+#define PTRS_PER_PUD	2048
+#define PTRS_PER_PGD	2048
+
+#define FIRST_USER_ADDRESS  0UL
+
+#define pte_ERROR(e) \
+	printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e))
+#define pmd_ERROR(e) \
+	printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e))
+#define pud_ERROR(e) \
+	printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e))
+#define pgd_ERROR(e) \
+	printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e))
+
+#ifndef __ASSEMBLY__
+/*
+ * The vmalloc and module area will always be on the topmost area of the
+ * kernel mapping. We reserve 128GB (64bit) for vmalloc and modules.
+ * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
+ * modules will reside. That makes sure that inter module branches always
+ * happen without trampolines and in addition the placement within a 2GB frame
+ * is branch prediction unit friendly.
+ */
+extern unsigned long VMALLOC_START;
+extern unsigned long VMALLOC_END;
+extern struct page *vmemmap;
+
+#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
+
+extern unsigned long MODULES_VADDR;
+extern unsigned long MODULES_END;
+#define MODULES_VADDR	MODULES_VADDR
+#define MODULES_END	MODULES_END
+#define MODULES_LEN	(1UL << 31)
+
+static inline int is_module_addr(void *addr)
+{
+	BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+	if (addr < (void *)MODULES_VADDR)
+		return 0;
+	if (addr > (void *)MODULES_END)
+		return 0;
+	return 1;
+}
+
+/*
+ * A 64 bit pagetable entry of S390 has following format:
+ * |			 PFRA			      |0IPC|  OS  |
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Page-Invalid Bit:    Page is not available for address-translation
+ * P Page-Protection Bit: Store access not possible for page
+ * C Change-bit override: HW is not required to set change bit
+ *
+ * A 64 bit segmenttable entry of S390 has following format:
+ * |        P-table origin                              |      TT
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Segment-Invalid Bit:    Segment is not available for address-translation
+ * C Common-Segment Bit:     Segment is not private (PoP 3-30)
+ * P Page-Protection Bit: Store access not possible for page
+ * TT Type 00
+ *
+ * A 64 bit region table entry of S390 has following format:
+ * |        S-table origin                             |   TF  TTTL
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * I Segment-Invalid Bit:    Segment is not available for address-translation
+ * TT Type 01
+ * TF
+ * TL Table length
+ *
+ * The 64 bit regiontable origin of S390 has following format:
+ * |      region table origon                          |       DTTL
+ * 0000000000111111111122222222223333333333444444444455555555556666
+ * 0123456789012345678901234567890123456789012345678901234567890123
+ *
+ * X Space-Switch event:
+ * G Segment-Invalid Bit:  
+ * P Private-Space Bit:    
+ * S Storage-Alteration:
+ * R Real space
+ * TL Table-Length:
+ *
+ * A storage key has the following format:
+ * | ACC |F|R|C|0|
+ *  0   3 4 5 6 7
+ * ACC: access key
+ * F  : fetch protection bit
+ * R  : referenced bit
+ * C  : changed bit
+ */
+
+/* Hardware bits in the page table entry */
+#define _PAGE_PROTECT	0x200		/* HW read-only bit  */
+#define _PAGE_INVALID	0x400		/* HW invalid bit    */
+#define _PAGE_LARGE	0x800		/* Bit to mark a large pte */
+
+/* Software bits in the page table entry */
+#define _PAGE_PRESENT	0x001		/* SW pte present bit */
+#define _PAGE_YOUNG	0x004		/* SW pte young bit */
+#define _PAGE_DIRTY	0x008		/* SW pte dirty bit */
+#define _PAGE_READ	0x010		/* SW pte read bit */
+#define _PAGE_WRITE	0x020		/* SW pte write bit */
+#define _PAGE_SPECIAL	0x040		/* SW associated with special page */
+#define _PAGE_UNUSED	0x080		/* SW bit for pgste usage state */
+#define __HAVE_ARCH_PTE_SPECIAL
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK		(PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
+				 _PAGE_YOUNG)
+
+/*
+ * handle_pte_fault uses pte_present and pte_none to find out the pte type
+ * WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
+ * distinguish present from not-present ptes. It is changed only with the page
+ * table lock held.
+ *
+ * The following table gives the different possible bit combinations for
+ * the pte hardware and software bits in the last 12 bits of a pte
+ * (. unassigned bit, x don't care, t swap type):
+ *
+ *				842100000000
+ *				000084210000
+ *				000000008421
+ *				.IR.uswrdy.p
+ * empty			.10.00000000
+ * swap				.11..ttttt.0
+ * prot-none, clean, old	.11.xx0000.1
+ * prot-none, clean, young	.11.xx0001.1
+ * prot-none, dirty, old	.10.xx0010.1
+ * prot-none, dirty, young	.10.xx0011.1
+ * read-only, clean, old	.11.xx0100.1
+ * read-only, clean, young	.01.xx0101.1
+ * read-only, dirty, old	.11.xx0110.1
+ * read-only, dirty, young	.01.xx0111.1
+ * read-write, clean, old	.11.xx1100.1
+ * read-write, clean, young	.01.xx1101.1
+ * read-write, dirty, old	.10.xx1110.1
+ * read-write, dirty, young	.00.xx1111.1
+ * HW-bits: R read-only, I invalid
+ * SW-bits: p present, y young, d dirty, r read, w write, s special,
+ *	    u unused, l large
+ *
+ * pte_none    is true for the bit pattern .10.00000000, pte == 0x400
+ * pte_swap    is true for the bit pattern .11..ooooo.0, (pte & 0x201) == 0x200
+ * pte_present is true for the bit pattern .xx.xxxxxx.1, (pte & 0x001) == 0x001
+ */
+
+/* Bits in the segment/region table address-space-control-element */
+#define _ASCE_ORIGIN		~0xfffUL/* segment table origin		    */
+#define _ASCE_PRIVATE_SPACE	0x100	/* private space control	    */
+#define _ASCE_ALT_EVENT		0x80	/* storage alteration event control */
+#define _ASCE_SPACE_SWITCH	0x40	/* space switch event		    */
+#define _ASCE_REAL_SPACE	0x20	/* real space control		    */
+#define _ASCE_TYPE_MASK		0x0c	/* asce table type mask		    */
+#define _ASCE_TYPE_REGION1	0x0c	/* region first table type	    */
+#define _ASCE_TYPE_REGION2	0x08	/* region second table type	    */
+#define _ASCE_TYPE_REGION3	0x04	/* region third table type	    */
+#define _ASCE_TYPE_SEGMENT	0x00	/* segment table type		    */
+#define _ASCE_TABLE_LENGTH	0x03	/* region table length		    */
+
+/* Bits in the region table entry */
+#define _REGION_ENTRY_ORIGIN	~0xfffUL/* region/segment table origin	    */
+#define _REGION_ENTRY_PROTECT	0x200	/* region protection bit	    */
+#define _REGION_ENTRY_INVALID	0x20	/* invalid region table entry	    */
+#define _REGION_ENTRY_TYPE_MASK	0x0c	/* region/segment table type mask   */
+#define _REGION_ENTRY_TYPE_R1	0x0c	/* region first table type	    */
+#define _REGION_ENTRY_TYPE_R2	0x08	/* region second table type	    */
+#define _REGION_ENTRY_TYPE_R3	0x04	/* region third table type	    */
+#define _REGION_ENTRY_LENGTH	0x03	/* region third length		    */
+
+#define _REGION1_ENTRY		(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH)
+#define _REGION1_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID)
+#define _REGION2_ENTRY		(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH)
+#define _REGION2_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID)
+#define _REGION3_ENTRY		(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH)
+#define _REGION3_ENTRY_EMPTY	(_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID)
+
+#define _REGION3_ENTRY_LARGE	0x400	/* RTTE-format control, large page  */
+#define _REGION3_ENTRY_RO	0x200	/* page protection bit		    */
+
+/* Bits in the segment table entry */
+#define _SEGMENT_ENTRY_BITS	0xfffffffffffffe33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
+#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
+#define _SEGMENT_ENTRY_ORIGIN	~0x7ffUL/* segment table origin		    */
+#define _SEGMENT_ENTRY_PROTECT	0x200	/* page protection bit		    */
+#define _SEGMENT_ENTRY_INVALID	0x20	/* invalid segment table entry	    */
+
+#define _SEGMENT_ENTRY		(0)
+#define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INVALID)
+
+#define _SEGMENT_ENTRY_DIRTY	0x2000	/* SW segment dirty bit */
+#define _SEGMENT_ENTRY_YOUNG	0x1000	/* SW segment young bit */
+#define _SEGMENT_ENTRY_SPLIT	0x0800	/* THP splitting bit */
+#define _SEGMENT_ENTRY_LARGE	0x0400	/* STE-format control, large page */
+#define _SEGMENT_ENTRY_READ	0x0002	/* SW segment read bit */
+#define _SEGMENT_ENTRY_WRITE	0x0001	/* SW segment write bit */
+
+/*
+ * Segment table entry encoding (R = read-only, I = invalid, y = young bit):
+ *				dy..R...I...wr
+ * prot-none, clean, old	00..1...1...00
+ * prot-none, clean, young	01..1...1...00
+ * prot-none, dirty, old	10..1...1...00
+ * prot-none, dirty, young	11..1...1...00
+ * read-only, clean, old	00..1...1...01
+ * read-only, clean, young	01..1...0...01
+ * read-only, dirty, old	10..1...1...01
+ * read-only, dirty, young	11..1...0...01
+ * read-write, clean, old	00..1...1...11
+ * read-write, clean, young	01..1...0...11
+ * read-write, dirty, old	10..0...1...11
+ * read-write, dirty, young	11..0...0...11
+ * The segment table origin is used to distinguish empty (origin==0) from
+ * read-write, old segment table entries (origin!=0)
+ * HW-bits: R read-only, I invalid
+ * SW-bits: y young, d dirty, r read, w write
+ */
+
+#define _SEGMENT_ENTRY_SPLIT_BIT 11	/* THP splitting bit number */
+
+/* Page status table bits for virtualization */
+#define PGSTE_ACC_BITS	0xf000000000000000UL
+#define PGSTE_FP_BIT	0x0800000000000000UL
+#define PGSTE_PCL_BIT	0x0080000000000000UL
+#define PGSTE_HR_BIT	0x0040000000000000UL
+#define PGSTE_HC_BIT	0x0020000000000000UL
+#define PGSTE_GR_BIT	0x0004000000000000UL
+#define PGSTE_GC_BIT	0x0002000000000000UL
+#define PGSTE_UC_BIT	0x0000800000000000UL	/* user dirty (migration) */
+#define PGSTE_IN_BIT	0x0000400000000000UL	/* IPTE notify bit */
+
+/* Guest Page State used for virtualization */
+#define _PGSTE_GPS_ZERO		0x0000000080000000UL
+#define _PGSTE_GPS_USAGE_MASK	0x0000000003000000UL
+#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
+#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+
+/*
+ * A user page table pointer has the space-switch-event bit, the
+ * private-space-control bit and the storage-alteration-event-control
+ * bit set. A kernel page table pointer doesn't need them.
+ */
+#define _ASCE_USER_BITS		(_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \
+				 _ASCE_ALT_EVENT)
+
+/*
+ * Page protection definitions.
+ */
+#define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_INVALID)
+#define PAGE_READ	__pgprot(_PAGE_PRESENT | _PAGE_READ | \
+				 _PAGE_INVALID | _PAGE_PROTECT)
+#define PAGE_WRITE	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_INVALID | _PAGE_PROTECT)
+
+#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+				 _PAGE_YOUNG | _PAGE_DIRTY)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \
+				 _PAGE_PROTECT)
+
+/*
+ * On s390 the page table entry has an invalid bit and a read-only bit.
+ * Read permission implies execute permission and write permission
+ * implies read permission.
+ */
+         /*xwr*/
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READ
+#define __P010	PAGE_READ
+#define __P011	PAGE_READ
+#define __P100	PAGE_READ
+#define __P101	PAGE_READ
+#define __P110	PAGE_READ
+#define __P111	PAGE_READ
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READ
+#define __S010	PAGE_WRITE
+#define __S011	PAGE_WRITE
+#define __S100	PAGE_READ
+#define __S101	PAGE_READ
+#define __S110	PAGE_WRITE
+#define __S111	PAGE_WRITE
+
+/*
+ * Segment entry (large page) protection definitions.
+ */
+#define SEGMENT_NONE	__pgprot(_SEGMENT_ENTRY_INVALID | \
+				 _SEGMENT_ENTRY_PROTECT)
+#define SEGMENT_READ	__pgprot(_SEGMENT_ENTRY_PROTECT | \
+				 _SEGMENT_ENTRY_READ)
+#define SEGMENT_WRITE	__pgprot(_SEGMENT_ENTRY_READ | \
+				 _SEGMENT_ENTRY_WRITE)
+
+static inline int mm_has_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (unlikely(mm->context.has_pgste))
+		return 1;
+#endif
+	return 0;
+}
+
+static inline int mm_alloc_pgste(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (unlikely(mm->context.alloc_pgste))
+		return 1;
+#endif
+	return 0;
+}
+
+/*
+ * In the case that a guest uses storage keys
+ * faults should no longer be backed by zero pages
+ */
+#define mm_forbids_zeropage mm_use_skey
+static inline int mm_use_skey(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (mm->context.use_skey)
+		return 1;
+#endif
+	return 0;
+}
+
+/*
+ * pgd/pmd/pte query functions
+ */
+static inline int pgd_present(pgd_t pgd)
+{
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+		return 1;
+	return (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
+		return 0;
+	return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+	/*
+	 * With dynamic page table levels the pgd can be a region table
+	 * entry or a segment table entry. Check for the bit that are
+	 * invalid for either table entry.
+	 */
+	unsigned long mask =
+		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
+		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
+	return (pgd_val(pgd) & mask) != 0;
+}
+
+static inline int pud_present(pud_t pud)
+{
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+		return 1;
+	return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL;
+}
+
+static inline int pud_none(pud_t pud)
+{
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
+		return 0;
+	return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL;
+}
+
+static inline int pud_large(pud_t pud)
+{
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
+		return 0;
+	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
+}
+
+static inline int pud_bad(pud_t pud)
+{
+	/*
+	 * With dynamic page table levels the pud can be a region table
+	 * entry or a segment table entry. Check for the bit that are
+	 * invalid for either table entry.
+	 */
+	unsigned long mask =
+		~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
+		~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
+	return (pud_val(pud) & mask) != 0;
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID;
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+	return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID;
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _SEGMENT_ENTRY_ORIGIN;
+	if (pmd_large(pmd))
+		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+	return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	if (pmd_large(pmd))
+		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
+	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
+}
+
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+				 unsigned long addr, pmd_t *pmdp);
+
+#define  __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp,
+				 pmd_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMD_WRITE
+static inline int pmd_write(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+	int dirty = 1;
+	if (pmd_large(pmd))
+		dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
+	return dirty;
+}
+
+static inline int pmd_young(pmd_t pmd)
+{
+	int young = 1;
+	if (pmd_large(pmd))
+		young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
+	return young;
+}
+
+static inline int pte_present(pte_t pte)
+{
+	/* Bit pattern: (pte & 0x001) == 0x001 */
+	return (pte_val(pte) & _PAGE_PRESENT) != 0;
+}
+
+static inline int pte_none(pte_t pte)
+{
+	/* Bit pattern: pte == 0x400 */
+	return pte_val(pte) == _PAGE_INVALID;
+}
+
+static inline int pte_swap(pte_t pte)
+{
+	/* Bit pattern: (pte & 0x201) == 0x200 */
+	return (pte_val(pte) & (_PAGE_PROTECT | _PAGE_PRESENT))
+		== _PAGE_PROTECT;
+}
+
+static inline int pte_special(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_SPECIAL);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t a, pte_t b)
+{
+	return pte_val(a) == pte_val(b);
+}
+
+static inline pgste_t pgste_get_lock(pte_t *ptep)
+{
+	unsigned long new = 0;
+#ifdef CONFIG_PGSTE
+	unsigned long old;
+
+	preempt_disable();
+	asm(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	nihh	%0,0xff7f\n"	/* clear PCL bit in old */
+		"	oihh	%1,0x0080\n"	/* set PCL bit in new */
+		"	csg	%0,%1,%2\n"
+		"	jl	0b\n"
+		: "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
+		: "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
+#endif
+	return __pgste(new);
+}
+
+static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+	asm(
+		"	nihh	%1,0xff7f\n"	/* clear PCL bit */
+		"	stg	%1,%0\n"
+		: "=Q" (ptep[PTRS_PER_PTE])
+		: "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
+		: "cc", "memory");
+	preempt_enable();
+#endif
+}
+
+static inline pgste_t pgste_get(pte_t *ptep)
+{
+	unsigned long pgste = 0;
+#ifdef CONFIG_PGSTE
+	pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
+#endif
+	return __pgste(pgste);
+}
+
+static inline void pgste_set(pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+	*(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
+#endif
+}
+
+static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
+				       struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	unsigned long address, bits, skey;
+
+	if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
+		return pgste;
+	address = pte_val(*ptep) & PAGE_MASK;
+	skey = (unsigned long) page_get_storage_key(address);
+	bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+	/* Transfer page changed & referenced bit to guest bits in pgste */
+	pgste_val(pgste) |= bits << 48;		/* GR bit & GC bit */
+	/* Copy page access key and fetch protection bit to pgste */
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+#endif
+	return pgste;
+
+}
+
+static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
+				 struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	unsigned long address;
+	unsigned long nkey;
+
+	if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
+		return;
+	VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
+	address = pte_val(entry) & PAGE_MASK;
+	/*
+	 * Set page access key and fetch protection bit from pgste.
+	 * The guest C/R information is still in the PGSTE, set real
+	 * key C/R to 0.
+	 */
+	nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+	nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
+	page_set_storage_key(address, nkey, 0);
+#endif
+}
+
+static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
+{
+	if ((pte_val(entry) & _PAGE_PRESENT) &&
+	    (pte_val(entry) & _PAGE_WRITE) &&
+	    !(pte_val(entry) & _PAGE_INVALID)) {
+		if (!MACHINE_HAS_ESOP) {
+			/*
+			 * Without enhanced suppression-on-protection force
+			 * the dirty bit on for all writable ptes.
+			 */
+			pte_val(entry) |= _PAGE_DIRTY;
+			pte_val(entry) &= ~_PAGE_PROTECT;
+		}
+		if (!(pte_val(entry) & _PAGE_PROTECT))
+			/* This pte allows write access, set user-dirty */
+			pgste_val(pgste) |= PGSTE_UC_BIT;
+	}
+	*ptep = entry;
+	return pgste;
+}
+
+/**
+ * struct gmap_struct - guest address space
+ * @crst_list: list of all crst tables used in the guest address space
+ * @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
+ * @table: pointer to the page directory
+ * @asce: address space control element for gmap page table
+ * @pfault_enabled: defines if pfaults are applicable for the guest
+ */
+struct gmap {
+	struct list_head list;
+	struct list_head crst_list;
+	struct mm_struct *mm;
+	struct radix_tree_root guest_to_host;
+	struct radix_tree_root host_to_guest;
+	spinlock_t guest_table_lock;
+	unsigned long *table;
+	unsigned long asce;
+	unsigned long asce_end;
+	void *private;
+	bool pfault_enabled;
+};
+
+/**
+ * struct gmap_notifier - notify function block for page invalidation
+ * @notifier_call: address of callback function
+ */
+struct gmap_notifier {
+	struct list_head list;
+	void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
+};
+
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
+void gmap_free(struct gmap *gmap);
+void gmap_enable(struct gmap *gmap);
+void gmap_disable(struct gmap *gmap);
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+		     unsigned long to, unsigned long len);
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
+
+
+void gmap_register_ipte_notifier(struct gmap_notifier *);
+void gmap_unregister_ipte_notifier(struct gmap_notifier *);
+int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
+void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
+
+static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+					unsigned long addr,
+					pte_t *ptep, pgste_t pgste)
+{
+#ifdef CONFIG_PGSTE
+	if (pgste_val(pgste) & PGSTE_IN_BIT) {
+		pgste_val(pgste) &= ~PGSTE_IN_BIT;
+		gmap_do_ipte_notify(mm, addr, ptep);
+	}
+#endif
+	return pgste;
+}
+
+/*
+ * Certain architectures need to do special things when PTEs
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t entry)
+{
+	pgste_t pgste;
+
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
+		pgste_set_key(ptep, pgste, entry, mm);
+		pgste = pgste_set_pte(ptep, pgste, entry);
+		pgste_set_unlock(ptep, pgste);
+	} else {
+		*ptep = entry;
+	}
+}
+
+/*
+ * query functions pte_write/pte_dirty/pte_young only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+static inline int pte_write(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_WRITE) != 0;
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_DIRTY) != 0;
+}
+
+static inline int pte_young(pte_t pte)
+{
+	return (pte_val(pte) & _PAGE_YOUNG) != 0;
+}
+
+#define __HAVE_ARCH_PTE_UNUSED
+static inline int pte_unused(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_UNUSED;
+}
+
+/*
+ * pgd/pmd/pte modification functions
+ */
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+		pgd_val(*pgd) = _REGION2_ENTRY_EMPTY;
+}
+
+static inline void pud_clear(pud_t *pud)
+{
+	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pud_val(*pud) = _REGION3_ENTRY_EMPTY;
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID;
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	pte_val(*ptep) = _PAGE_INVALID;
+}
+
+/*
+ * The following pte modification functions only work if
+ * pte_present() is true. Undefined behaviour if not..
+ */
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) &= _PAGE_CHG_MASK;
+	pte_val(pte) |= pgprot_val(newprot);
+	/*
+	 * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the
+	 * invalid bit set, clear it again for readable, young pages
+	 */
+	if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ))
+		pte_val(pte) &= ~_PAGE_INVALID;
+	/*
+	 * newprot for PAGE_READ and PAGE_WRITE has the page protection
+	 * bit set, clear it again for writable, dirty pages
+	 */
+	if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE))
+		pte_val(pte) &= ~_PAGE_PROTECT;
+	return pte;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_WRITE;
+	pte_val(pte) |= _PAGE_PROTECT;
+	return pte;
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_WRITE;
+	if (pte_val(pte) & _PAGE_DIRTY)
+		pte_val(pte) &= ~_PAGE_PROTECT;
+	return pte;
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_DIRTY;
+	pte_val(pte) |= _PAGE_PROTECT;
+	return pte;
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_DIRTY;
+	if (pte_val(pte) & _PAGE_WRITE)
+		pte_val(pte) &= ~_PAGE_PROTECT;
+	return pte;
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_YOUNG;
+	pte_val(pte) |= _PAGE_INVALID;
+	return pte;
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_YOUNG;
+	if (pte_val(pte) & _PAGE_READ)
+		pte_val(pte) &= ~_PAGE_INVALID;
+	return pte;
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_SPECIAL;
+	return pte;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_LARGE;
+	return pte;
+}
+#endif
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+{
+	unsigned long pto = (unsigned long) ptep;
+
+	/* Invalidation + global TLB flush for the pte */
+	asm volatile(
+		"	ipte	%2,%3"
+		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+}
+
+static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
+{
+	unsigned long pto = (unsigned long) ptep;
+
+	/* Invalidation + local TLB flush for the pte */
+	asm volatile(
+		"	.insn rrf,0xb2210000,%2,%3,0,1"
+		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+}
+
+static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
+{
+	unsigned long pto = (unsigned long) ptep;
+
+	/* Invalidate a range of ptes + global TLB flush of the ptes */
+	do {
+		asm volatile(
+			"	.insn rrf,0xb2210000,%2,%0,%1,0"
+			: "+a" (address), "+a" (nr) : "a" (pto) : "memory");
+	} while (nr != 255);
+}
+
+static inline void ptep_flush_direct(struct mm_struct *mm,
+				     unsigned long address, pte_t *ptep)
+{
+	int active, count;
+
+	if (pte_val(*ptep) & _PAGE_INVALID)
+		return;
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__ptep_ipte_local(address, ptep);
+	else
+		__ptep_ipte(address, ptep);
+	atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+static inline void ptep_flush_lazy(struct mm_struct *mm,
+				   unsigned long address, pte_t *ptep)
+{
+	int active, count;
+
+	if (pte_val(*ptep) & _PAGE_INVALID)
+		return;
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if ((count & 0xffff) <= active) {
+		pte_val(*ptep) |= _PAGE_INVALID;
+		mm->context.flush_mm = 1;
+	} else
+		__ptep_ipte(address, ptep);
+	atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+/*
+ * Get (and clear) the user dirty bit for a pte.
+ */
+static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
+						 unsigned long addr,
+						 pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte;
+	int dirty;
+
+	if (!mm_has_pgste(mm))
+		return 0;
+	pgste = pgste_get_lock(ptep);
+	dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
+	pgste_val(pgste) &= ~PGSTE_UC_BIT;
+	pte = *ptep;
+	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
+		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
+		__ptep_ipte(addr, ptep);
+		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
+			pte_val(pte) |= _PAGE_PROTECT;
+		else
+			pte_val(pte) |= _PAGE_INVALID;
+		*ptep = pte;
+	}
+	pgste_set_unlock(ptep, pgste);
+	return dirty;
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long addr, pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte, oldpte;
+	int young;
+
+	if (mm_has_pgste(vma->vm_mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
+	}
+
+	oldpte = pte = *ptep;
+	ptep_flush_direct(vma->vm_mm, addr, ptep);
+	young = pte_young(pte);
+	pte = pte_mkold(pte);
+
+	if (mm_has_pgste(vma->vm_mm)) {
+		pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
+		pgste = pgste_set_pte(ptep, pgste, pte);
+		pgste_set_unlock(ptep, pgste);
+	} else
+		*ptep = pte;
+
+	return young;
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+					 unsigned long address, pte_t *ptep)
+{
+	return ptep_test_and_clear_young(vma, address, ptep);
+}
+
+/*
+ * This is hard to understand. ptep_get_and_clear and ptep_clear_flush
+ * both clear the TLB for the unmapped pte. The reason is that
+ * ptep_get_and_clear is used in common code (e.g. change_pte_range)
+ * to modify an active pte. The sequence is
+ *   1) ptep_get_and_clear
+ *   2) set_pte_at
+ *   3) flush_tlb_range
+ * On s390 the tlb needs to get flushed with the modification of the pte
+ * if the pte is active. The only way how this can be implemented is to
+ * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range
+ * is a nop.
+ */
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long address, pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte;
+
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+	}
+
+	pte = *ptep;
+	ptep_flush_lazy(mm, address, ptep);
+	pte_val(*ptep) = _PAGE_INVALID;
+
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_update_all(&pte, pgste, mm);
+		pgste_set_unlock(ptep, pgste);
+	}
+	return pte;
+}
+
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
+					   unsigned long address,
+					   pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte;
+
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste_ipte_notify(mm, address, ptep, pgste);
+	}
+
+	pte = *ptep;
+	ptep_flush_lazy(mm, address, ptep);
+
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_update_all(&pte, pgste, mm);
+		pgste_set(ptep, pgste);
+	}
+	return pte;
+}
+
+static inline void ptep_modify_prot_commit(struct mm_struct *mm,
+					   unsigned long address,
+					   pte_t *ptep, pte_t pte)
+{
+	pgste_t pgste;
+
+	if (mm_has_pgste(mm)) {
+		pgste = pgste_get(ptep);
+		pgste_set_key(ptep, pgste, pte, mm);
+		pgste = pgste_set_pte(ptep, pgste, pte);
+		pgste_set_unlock(ptep, pgste);
+	} else
+		*ptep = pte;
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
+				     unsigned long address, pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte;
+
+	if (mm_has_pgste(vma->vm_mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
+	}
+
+	pte = *ptep;
+	ptep_flush_direct(vma->vm_mm, address, ptep);
+	pte_val(*ptep) = _PAGE_INVALID;
+
+	if (mm_has_pgste(vma->vm_mm)) {
+		if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+		    _PGSTE_GPS_USAGE_UNUSED)
+			pte_val(pte) |= _PAGE_UNUSED;
+		pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
+		pgste_set_unlock(ptep, pgste);
+	}
+	return pte;
+}
+
+/*
+ * The batched pte unmap code uses ptep_get_and_clear_full to clear the
+ * ptes. Here an optimization is possible. tlb_gather_mmu flushes all
+ * tlbs of an mm if it can guarantee that the ptes of the mm_struct
+ * cannot be accessed while the batched unmap is running. In this case
+ * full==1 and a simple pte_clear is enough. See tlb.h.
+ */
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+					    unsigned long address,
+					    pte_t *ptep, int full)
+{
+	pgste_t pgste;
+	pte_t pte;
+
+	if (!full && mm_has_pgste(mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+	}
+
+	pte = *ptep;
+	if (!full)
+		ptep_flush_lazy(mm, address, ptep);
+	pte_val(*ptep) = _PAGE_INVALID;
+
+	if (!full && mm_has_pgste(mm)) {
+		pgste = pgste_update_all(&pte, pgste, mm);
+		pgste_set_unlock(ptep, pgste);
+	}
+	return pte;
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
+				       unsigned long address, pte_t *ptep)
+{
+	pgste_t pgste;
+	pte_t pte = *ptep;
+
+	if (pte_write(pte)) {
+		if (mm_has_pgste(mm)) {
+			pgste = pgste_get_lock(ptep);
+			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+		}
+
+		ptep_flush_lazy(mm, address, ptep);
+		pte = pte_wrprotect(pte);
+
+		if (mm_has_pgste(mm)) {
+			pgste = pgste_set_pte(ptep, pgste, pte);
+			pgste_set_unlock(ptep, pgste);
+		} else
+			*ptep = pte;
+	}
+	return pte;
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+					unsigned long address, pte_t *ptep,
+					pte_t entry, int dirty)
+{
+	pgste_t pgste;
+
+	if (pte_same(*ptep, entry))
+		return 0;
+	if (mm_has_pgste(vma->vm_mm)) {
+		pgste = pgste_get_lock(ptep);
+		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
+	}
+
+	ptep_flush_direct(vma->vm_mm, address, ptep);
+
+	if (mm_has_pgste(vma->vm_mm)) {
+		pgste_set_key(ptep, pgste, entry, vma->vm_mm);
+		pgste = pgste_set_pte(ptep, pgste, entry);
+		pgste_set_unlock(ptep, pgste);
+	} else
+		*ptep = entry;
+	return 1;
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
+{
+	pte_t __pte;
+	pte_val(__pte) = physpage + pgprot_val(pgprot);
+	return pte_mkyoung(__pte);
+}
+
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+	unsigned long physpage = page_to_phys(page);
+	pte_t __pte = mk_pte_phys(physpage, pgprot);
+
+	if (pte_write(__pte) && PageDirty(page))
+		__pte = pte_mkdirty(__pte);
+	return __pte;
+}
+
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
+
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
+#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
+#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+{
+	pud_t *pud = (pud_t *) pgd;
+	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+		pud = (pud_t *) pgd_deref(*pgd);
+	return pud  + pud_index(address);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+{
+	pmd_t *pmd = (pmd_t *) pud;
+	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pmd = (pmd_t *) pud_deref(*pud);
+	return pmd + pmd_index(address);
+}
+
+#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+
+/* Find an entry in the lowest level page table.. */
+#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
+#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
+#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+#define pte_unmap(pte) do { } while (0)
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
+{
+	/*
+	 * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx)
+	 * Convert to segment table entry format.
+	 */
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE))
+		return pgprot_val(SEGMENT_NONE);
+	if (pgprot_val(pgprot) == pgprot_val(PAGE_READ))
+		return pgprot_val(SEGMENT_READ);
+	return pgprot_val(SEGMENT_WRITE);
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+	pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
+	if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+		return pmd;
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+	if (pmd_large(pmd)) {
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
+		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	}
+	return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+	if (pmd_large(pmd)) {
+		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY;
+		if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+			pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+	}
+	return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+	if (pmd_large(pmd)) {
+		pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+		if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+			pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+	}
+	return pmd;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+	if (pmd_large(pmd)) {
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+	}
+	return pmd;
+}
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	if (pmd_large(pmd)) {
+		pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+			_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT;
+		pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+			pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+		return pmd;
+	}
+	pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
+	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+	return pmd;
+}
+
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
+{
+	pmd_t __pmd;
+	pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+	return __pmd;
+}
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
+
+static inline void __pmdp_csp(pmd_t *pmdp)
+{
+	register unsigned long reg2 asm("2") = pmd_val(*pmdp);
+	register unsigned long reg3 asm("3") = pmd_val(*pmdp) |
+					       _SEGMENT_ENTRY_INVALID;
+	register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5;
+
+	asm volatile(
+		"	csp %1,%3"
+		: "=m" (*pmdp)
+		: "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc");
+}
+
+static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
+{
+	unsigned long sto;
+
+	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
+		: "=m" (*pmdp)
+		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+		: "cc" );
+}
+
+static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
+{
+	unsigned long sto;
+
+	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
+		: "=m" (*pmdp)
+		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+		: "cc" );
+}
+
+static inline void pmdp_flush_direct(struct mm_struct *mm,
+				     unsigned long address, pmd_t *pmdp)
+{
+	int active, count;
+
+	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+		return;
+	if (!MACHINE_HAS_IDTE) {
+		__pmdp_csp(pmdp);
+		return;
+	}
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
+		__pmdp_idte_local(address, pmdp);
+	else
+		__pmdp_idte(address, pmdp);
+	atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+static inline void pmdp_flush_lazy(struct mm_struct *mm,
+				   unsigned long address, pmd_t *pmdp)
+{
+	int active, count;
+
+	if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+		return;
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if ((count & 0xffff) <= active) {
+		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
+		mm->context.flush_mm = 1;
+	} else if (MACHINE_HAS_IDTE)
+		__pmdp_idte(address, pmdp);
+	else
+		__pmdp_csp(pmdp);
+	atomic_sub(0x10000, &mm->context.attach_count);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				       pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) &&
+		(pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT);
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t entry)
+{
+	*pmdp = entry;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+					    unsigned long address, pmd_t *pmdp)
+{
+	pmd_t pmd;
+
+	pmd = *pmdp;
+	pmdp_flush_direct(vma->vm_mm, address, pmdp);
+	*pmdp = pmd_mkold(pmd);
+	return pmd_young(pmd);
+}
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+				       unsigned long address, pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+
+	pmdp_flush_direct(mm, address, pmdp);
+	pmd_clear(pmdp);
+	return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL
+static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm,
+					    unsigned long address,
+					    pmd_t *pmdp, int full)
+{
+	pmd_t pmd = *pmdp;
+
+	if (!full)
+		pmdp_flush_lazy(mm, address, pmdp);
+	pmd_clear(pmdp);
+	return pmd;
+}
+
+#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
+static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma,
+				     unsigned long address, pmd_t *pmdp)
+{
+	return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+static inline void pmdp_invalidate(struct vm_area_struct *vma,
+				   unsigned long address, pmd_t *pmdp)
+{
+	pmdp_flush_direct(vma->vm_mm, address, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+				      unsigned long address, pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+
+	if (pmd_write(pmd)) {
+		pmdp_flush_direct(mm, address, pmdp);
+		set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd));
+	}
+}
+
+#define pfn_pmd(pfn, pgprot)	mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+#define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE;
+}
+
+static inline int has_transparent_hugepage(void)
+{
+	return MACHINE_HAS_HPAGE ? 1 : 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * 64 bit swap entry format:
+ * A page-table entry has some bits we have to treat in a special way.
+ * Bits 52 and bit 55 have to be zero, otherwise an specification
+ * exception will occur instead of a page translation exception. The
+ * specifiation exception has the bad habit not to store necessary
+ * information in the lowcore.
+ * Bits 54 and 63 are used to indicate the page type.
+ * A swap pte is indicated by bit pattern (pte & 0x201) == 0x200
+ * This leaves the bits 0-51 and bits 56-62 to store type and offset.
+ * We use the 5 bits from 57-61 for the type and the 52 bits from 0-51
+ * for the offset.
+ * |			  offset			|01100|type |00|
+ * |0000000000111111111122222222223333333333444444444455|55555|55566|66|
+ * |0123456789012345678901234567890123456789012345678901|23456|78901|23|
+ */
+
+#define __SWP_OFFSET_MASK	((1UL << 52) - 1)
+#define __SWP_OFFSET_SHIFT	12
+#define __SWP_TYPE_MASK		((1UL << 5) - 1)
+#define __SWP_TYPE_SHIFT	2
+
+static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
+{
+	pte_t pte;
+
+	pte_val(pte) = _PAGE_INVALID | _PAGE_PROTECT;
+	pte_val(pte) |= (offset & __SWP_OFFSET_MASK) << __SWP_OFFSET_SHIFT;
+	pte_val(pte) |= (type & __SWP_TYPE_MASK) << __SWP_TYPE_SHIFT;
+	return pte;
+}
+
+static inline unsigned long __swp_type(swp_entry_t entry)
+{
+	return (entry.val >> __SWP_TYPE_SHIFT) & __SWP_TYPE_MASK;
+}
+
+static inline unsigned long __swp_offset(swp_entry_t entry)
+{
+	return (entry.val >> __SWP_OFFSET_SHIFT) & __SWP_OFFSET_MASK;
+}
+
+static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
+{
+	return (swp_entry_t) { pte_val(mk_swap_pte(type, offset)) };
+}
+
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)	((pte_t) { (x).val })
+
+#endif /* !__ASSEMBLY__ */
+
+#define kern_addr_valid(addr)   (1)
+
+extern int vmem_add_mapping(unsigned long start, unsigned long size);
+extern int vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int s390_enable_sie(void);
+extern int s390_enable_skey(void);
+extern void s390_reset_cmma(struct mm_struct *mm);
+
+/* s390 has a private copy of get unmapped area to deal with cache synonyms */
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+/*
+ * No page table caches to initialise
+ */
+static inline void pgtable_cache_init(void) { }
+static inline void check_pgt_cache(void) { }
+
+#include <asm-generic/pgtable.h>
+
+#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
new file mode 100644
index 000000000..dedb62185
--- /dev/null
+++ b/arch/s390/include/asm/processor.h
@@ -0,0 +1,360 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/processor.h"
+ *    Copyright (C) 1994, Linus Torvalds
+ */
+
+#ifndef __ASM_S390_PROCESSOR_H
+#define __ASM_S390_PROCESSOR_H
+
+#define CIF_MCCK_PENDING	0	/* machine check handling is pending */
+#define CIF_ASCE		1	/* user asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
+
+#define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
+#define _CIF_ASCE		(1<<CIF_ASCE)
+#define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/irqflags.h>
+#include <asm/cpu.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/setup.h>
+#include <asm/runtime_instr.h>
+
+static inline void set_cpu_flag(int flag)
+{
+	S390_lowcore.cpu_flags |= (1U << flag);
+}
+
+static inline void clear_cpu_flag(int flag)
+{
+	S390_lowcore.cpu_flags &= ~(1U << flag);
+}
+
+static inline int test_cpu_flag(int flag)
+{
+	return !!(S390_lowcore.cpu_flags & (1U << flag));
+}
+
+#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
+
+static inline void get_cpu_id(struct cpuid *ptr)
+{
+	asm volatile("stidp %0" : "=Q" (*ptr));
+}
+
+extern void s390_adjust_jiffies(void);
+extern const struct seq_operations cpuinfo_op;
+extern int sysctl_ieee_emulation_warnings;
+extern void execve_tail(void);
+
+/*
+ * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
+ */
+
+#define TASK_SIZE_OF(tsk)	((tsk)->mm->context.asce_limit)
+#define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
+					(1UL << 30) : (1UL << 41))
+#define TASK_SIZE		TASK_SIZE_OF(current)
+#define TASK_MAX_SIZE		(1UL << 53)
+
+#define STACK_TOP		(1UL << (test_thread_flag(TIF_31BIT) ? 31:42))
+#define STACK_TOP_MAX		(1UL << 42)
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+typedef struct {
+        __u32 ar4;
+} mm_segment_t;
+
+/*
+ * Thread structure
+ */
+struct thread_struct {
+	s390_fp_regs fp_regs;
+	unsigned int  acrs[NUM_ACRS];
+        unsigned long ksp;              /* kernel stack pointer             */
+	mm_segment_t mm_segment;
+	unsigned long gmap_addr;	/* address of last gmap fault. */
+	unsigned int gmap_pfault;	/* signal of a pending guest pfault */
+	struct per_regs per_user;	/* User specified PER registers */
+	struct per_event per_event;	/* Cause of the last PER trap */
+	unsigned long per_flags;	/* Flags to control debug behavior */
+        /* pfault_wait is used to block the process on a pfault event */
+	unsigned long pfault_wait;
+	struct list_head list;
+	/* cpu runtime instrumentation */
+	struct runtime_instr_cb *ri_cb;
+	int ri_signum;
+	unsigned char trap_tdb[256];	/* Transaction abort diagnose block */
+	__vector128 *vxrs;		/* Vector register save area */
+};
+
+/* Flag to disable transactions. */
+#define PER_FLAG_NO_TE			1UL
+/* Flag to enable random transaction aborts. */
+#define PER_FLAG_TE_ABORT_RAND		2UL
+/* Flag to specify random transaction abort mode:
+ * - abort each transaction at a random instruction before TEND if set.
+ * - abort random transactions at a random instruction if cleared.
+ */
+#define PER_FLAG_TE_ABORT_RAND_TEND	4UL
+
+typedef struct thread_struct thread_struct;
+
+/*
+ * Stack layout of a C stack frame.
+ */
+#ifndef __PACK_STACK
+struct stack_frame {
+	unsigned long back_chain;
+	unsigned long empty1[5];
+	unsigned long gprs[10];
+	unsigned int  empty2[8];
+};
+#else
+struct stack_frame {
+	unsigned long empty1[5];
+	unsigned int  empty2[8];
+	unsigned long gprs[10];
+	unsigned long back_chain;
+};
+#endif
+
+#define ARCH_MIN_TASKALIGN	8
+
+#define INIT_THREAD {							\
+	.ksp = sizeof(init_stack) + (unsigned long) &init_stack,	\
+}
+
+/*
+ * Do necessary setup to start up a new thread.
+ */
+#define start_thread(regs, new_psw, new_stackp) do {			\
+	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA;	\
+	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->gprs[15]	= new_stackp;					\
+	execve_tail();							\
+} while (0)
+
+#define start_thread31(regs, new_psw, new_stackp) do {			\
+	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_BA;			\
+	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
+	regs->gprs[15]	= new_stackp;					\
+	crst_table_downgrade(current->mm, 1UL << 31);			\
+	execve_tail();							\
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+struct mm_struct;
+struct seq_file;
+
+void show_cacheinfo(struct seq_file *m);
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+extern unsigned long thread_saved_pc(struct task_struct *t);
+
+unsigned long get_wchan(struct task_struct *p);
+#define task_pt_regs(tsk) ((struct pt_regs *) \
+        (task_stack_page(tsk) + THREAD_SIZE) - 1)
+#define KSTK_EIP(tsk)	(task_pt_regs(tsk)->psw.addr)
+#define KSTK_ESP(tsk)	(task_pt_regs(tsk)->gprs[15])
+
+/* Has task runtime instrumentation enabled ? */
+#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
+
+static inline unsigned short stap(void)
+{
+	unsigned short cpu_address;
+
+	asm volatile("stap %0" : "=m" (cpu_address));
+	return cpu_address;
+}
+
+/*
+ * Give up the time slice of the virtual PU.
+ */
+void cpu_relax(void);
+
+#define cpu_relax_lowlatency()  barrier()
+
+static inline void psw_set_key(unsigned int key)
+{
+	asm volatile("spka 0(%0)" : : "d" (key));
+}
+
+/*
+ * Set PSW to specified value.
+ */
+static inline void __load_psw(psw_t psw)
+{
+	asm volatile("lpswe %0" : : "Q" (psw) : "cc");
+}
+
+/*
+ * Set PSW mask to specified value, while leaving the
+ * PSW addr pointing to the next instruction.
+ */
+static inline void __load_psw_mask (unsigned long mask)
+{
+	unsigned long addr;
+	psw_t psw;
+
+	psw.mask = mask;
+
+	asm volatile(
+		"	larl	%0,1f\n"
+		"	stg	%0,%O1+8(%R1)\n"
+		"	lpswe	%1\n"
+		"1:"
+		: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
+}
+
+/*
+ * Rewind PSW instruction address by specified number of bytes.
+ */
+static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
+{
+	unsigned long mask;
+
+	mask = (psw.mask & PSW_MASK_EA) ? -1UL :
+	       (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
+					  (1UL << 24) - 1;
+	return (psw.addr - ilc) & mask;
+}
+
+/*
+ * Function to stop a processor until the next interrupt occurs
+ */
+void enabled_wait(void);
+
+/*
+ * Function to drop a processor into disabled wait state
+ */
+static inline void __noreturn disabled_wait(unsigned long code)
+{
+        unsigned long ctl_buf;
+        psw_t dw_psw;
+
+	dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
+        dw_psw.addr = code;
+        /* 
+         * Store status and then load disabled wait psw,
+         * the processor is dead afterwards
+         */
+	asm volatile(
+		"	stctg	0,0,0(%2)\n"
+		"	ni	4(%2),0xef\n"	/* switch off protection */
+		"	lctlg	0,0,0(%2)\n"
+		"	lghi	1,0x1000\n"
+		"	stpt	0x328(1)\n"	/* store timer */
+		"	stckc	0x330(1)\n"	/* store clock comparator */
+		"	stpx	0x318(1)\n"	/* store prefix register */
+		"	stam	0,15,0x340(1)\n"/* store access registers */
+		"	stfpc	0x31c(1)\n"	/* store fpu control */
+		"	std	0,0x200(1)\n"	/* store f0 */
+		"	std	1,0x208(1)\n"	/* store f1 */
+		"	std	2,0x210(1)\n"	/* store f2 */
+		"	std	3,0x218(1)\n"	/* store f3 */
+		"	std	4,0x220(1)\n"	/* store f4 */
+		"	std	5,0x228(1)\n"	/* store f5 */
+		"	std	6,0x230(1)\n"	/* store f6 */
+		"	std	7,0x238(1)\n"	/* store f7 */
+		"	std	8,0x240(1)\n"	/* store f8 */
+		"	std	9,0x248(1)\n"	/* store f9 */
+		"	std	10,0x250(1)\n"	/* store f10 */
+		"	std	11,0x258(1)\n"	/* store f11 */
+		"	std	12,0x260(1)\n"	/* store f12 */
+		"	std	13,0x268(1)\n"	/* store f13 */
+		"	std	14,0x270(1)\n"	/* store f14 */
+		"	std	15,0x278(1)\n"	/* store f15 */
+		"	stmg	0,15,0x280(1)\n"/* store general registers */
+		"	stctg	0,15,0x380(1)\n"/* store control registers */
+		"	oi	0x384(1),0x10\n"/* fake protection bit */
+		"	lpswe	0(%1)"
+		: "=m" (ctl_buf)
+		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0", "1");
+	while (1);
+}
+
+/*
+ * Use to set psw mask except for the first byte which
+ * won't be changed by this function.
+ */
+static inline void
+__set_psw_mask(unsigned long mask)
+{
+	__load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8)));
+}
+
+#define local_mcck_enable() \
+	__set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT | PSW_MASK_MCHECK)
+#define local_mcck_disable() \
+	__set_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT)
+
+/*
+ * Basic Machine Check/Program Check Handler.
+ */
+
+extern void s390_base_mcck_handler(void);
+extern void s390_base_pgm_handler(void);
+extern void s390_base_ext_handler(void);
+
+extern void (*s390_base_mcck_handler_fn)(void);
+extern void (*s390_base_pgm_handler_fn)(void);
+extern void (*s390_base_ext_handler_fn)(void);
+
+#define ARCH_LOW_ADDRESS_LIMIT	0x7fffffffUL
+
+extern int memcpy_real(void *, void *, size_t);
+extern void memcpy_absolute(void *, void *, size_t);
+
+#define mem_assign_absolute(dest, val) {			\
+	__typeof__(dest) __tmp = (val);				\
+								\
+	BUILD_BUG_ON(sizeof(__tmp) != sizeof(val));		\
+	memcpy_absolute(&(dest), &__tmp, sizeof(__tmp));	\
+}
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target)	\
+	".section __ex_table,\"a\"\n"	\
+	".align	4\n"			\
+	".long	(" #_fault ") - .\n"	\
+	".long	(" #_target ") - .\n"	\
+	".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target)	\
+	.section __ex_table,"a"	;	\
+	.align	4 ;			\
+	.long	(_fault) - . ;		\
+	.long	(_target) - . ;		\
+	.previous
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_S390_PROCESSOR_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
new file mode 100644
index 000000000..6feda2599
--- /dev/null
+++ b/arch/s390/include/asm/ptrace.h
@@ -0,0 +1,177 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+#ifndef _S390_PTRACE_H
+#define _S390_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+
+#define PIF_SYSCALL		0	/* inside a system call */
+#define PIF_PER_TRAP		1	/* deliver sigtrap on return to user */
+
+#define _PIF_SYSCALL		(1<<PIF_SYSCALL)
+#define _PIF_PER_TRAP		(1<<PIF_PER_TRAP)
+
+#ifndef __ASSEMBLY__
+
+#define PSW_KERNEL_BITS	(PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
+			 PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_USER_BITS	(PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+			 PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
+			 PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
+
+struct psw_bits {
+	unsigned long long	: 1;
+	unsigned long long r	: 1; /* PER-Mask */
+	unsigned long long	: 3;
+	unsigned long long t	: 1; /* DAT Mode */
+	unsigned long long i	: 1; /* Input/Output Mask */
+	unsigned long long e	: 1; /* External Mask */
+	unsigned long long key	: 4; /* PSW Key */
+	unsigned long long	: 1;
+	unsigned long long m	: 1; /* Machine-Check Mask */
+	unsigned long long w	: 1; /* Wait State */
+	unsigned long long p	: 1; /* Problem State */
+	unsigned long long as	: 2; /* Address Space Control */
+	unsigned long long cc	: 2; /* Condition Code */
+	unsigned long long pm	: 4; /* Program Mask */
+	unsigned long long ri	: 1; /* Runtime Instrumentation */
+	unsigned long long	: 6;
+	unsigned long long eaba : 2; /* Addressing Mode */
+	unsigned long long	: 31;
+	unsigned long long ia	: 64;/* Instruction Address */
+};
+
+enum {
+	PSW_AMODE_24BIT = 0,
+	PSW_AMODE_31BIT = 1,
+	PSW_AMODE_64BIT = 3
+};
+
+enum {
+	PSW_AS_PRIMARY	 = 0,
+	PSW_AS_ACCREG	 = 1,
+	PSW_AS_SECONDARY = 2,
+	PSW_AS_HOME	 = 3
+};
+
+#define psw_bits(__psw) (*({			\
+	typecheck(psw_t, __psw);		\
+	&(*(struct psw_bits *)(&(__psw)));	\
+}))
+
+/*
+ * The pt_regs struct defines the way the registers are stored on
+ * the stack during a system call.
+ */
+struct pt_regs 
+{
+	unsigned long args[1];
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+	unsigned long orig_gpr2;
+	unsigned int int_code;
+	unsigned int int_parm;
+	unsigned long int_parm_long;
+	unsigned long flags;
+};
+
+/*
+ * Program event recording (PER) register set.
+ */
+struct per_regs {
+	unsigned long control;		/* PER control bits */
+	unsigned long start;		/* PER starting address */
+	unsigned long end;		/* PER ending address */
+};
+
+/*
+ * PER event contains information about the cause of the last PER exception.
+ */
+struct per_event {
+	unsigned short cause;		/* PER code, ATMID and AI */
+	unsigned long address;		/* PER address */
+	unsigned char paid;		/* PER access identification */
+};
+
+/*
+ * Simplified per_info structure used to decode the ptrace user space ABI.
+ */
+struct per_struct_kernel {
+	unsigned long cr9;		/* PER control bits */
+	unsigned long cr10;		/* PER starting address */
+	unsigned long cr11;		/* PER ending address */
+	unsigned long bits;		/* Obsolete software bits */
+	unsigned long starting_addr;	/* User specified start address */
+	unsigned long ending_addr;	/* User specified end address */
+	unsigned short perc_atmid;	/* PER trap ATMID */
+	unsigned long address;		/* PER trap instruction address */
+	unsigned char access_id;	/* PER trap access identification */
+};
+
+#define PER_EVENT_MASK			0xEB000000UL
+
+#define PER_EVENT_BRANCH		0x80000000UL
+#define PER_EVENT_IFETCH		0x40000000UL
+#define PER_EVENT_STORE			0x20000000UL
+#define PER_EVENT_STORE_REAL		0x08000000UL
+#define PER_EVENT_TRANSACTION_END	0x02000000UL
+#define PER_EVENT_NULLIFICATION		0x01000000UL
+
+#define PER_CONTROL_MASK		0x00e00000UL
+
+#define PER_CONTROL_BRANCH_ADDRESS	0x00800000UL
+#define PER_CONTROL_SUSPENSION		0x00400000UL
+#define PER_CONTROL_ALTERATION		0x00200000UL
+
+static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	regs->flags |= (1U << flag);
+}
+
+static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	regs->flags &= ~(1U << flag);
+}
+
+static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	return !!(regs->flags & (1U << flag));
+}
+
+/*
+ * These are defined as per linux/ptrace.h, which see.
+ */
+#define arch_has_single_step()	(1)
+#define arch_has_block_step()	(1)
+
+#define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
+#define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define user_stack_pointer(regs)((regs)->gprs[15])
+#define profile_pc(regs) instruction_pointer(regs)
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->gprs[2];
+}
+
+static inline void instruction_pointer_set(struct pt_regs *regs,
+					   unsigned long val)
+{
+	regs->psw.addr = val | PSW_ADDR_AMODE;
+}
+
+int regs_query_register_offset(const char *name);
+const char *regs_query_register_name(unsigned int offset);
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+	return regs->gprs[15] & PSW_ADDR_INSN;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
new file mode 100644
index 000000000..998b61cd0
--- /dev/null
+++ b/arch/s390/include/asm/qdio.h
@@ -0,0 +1,430 @@
+/*
+ * Copyright IBM Corp. 2000, 2008
+ * Author(s): Utz Bacher <utz.bacher@de.ibm.com>
+ *	      Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ */
+#ifndef __QDIO_H__
+#define __QDIO_H__
+
+#include <linux/interrupt.h>
+#include <asm/cio.h>
+#include <asm/ccwdev.h>
+
+/* only use 4 queues to save some cachelines */
+#define QDIO_MAX_QUEUES_PER_IRQ		4
+#define QDIO_MAX_BUFFERS_PER_Q		128
+#define QDIO_MAX_BUFFERS_MASK		(QDIO_MAX_BUFFERS_PER_Q - 1)
+#define QDIO_MAX_ELEMENTS_PER_BUFFER	16
+#define QDIO_SBAL_SIZE			256
+
+#define QDIO_QETH_QFMT			0
+#define QDIO_ZFCP_QFMT			1
+#define QDIO_IQDIO_QFMT			2
+
+/**
+ * struct qdesfmt0 - queue descriptor, format 0
+ * @sliba: storage list information block address
+ * @sla: storage list address
+ * @slsba: storage list state block address
+ * @akey: access key for DLIB
+ * @bkey: access key for SL
+ * @ckey: access key for SBALs
+ * @dkey: access key for SLSB
+ */
+struct qdesfmt0 {
+	u64 sliba;
+	u64 sla;
+	u64 slsba;
+	u32	 : 32;
+	u32 akey : 4;
+	u32 bkey : 4;
+	u32 ckey : 4;
+	u32 dkey : 4;
+	u32	 : 16;
+} __attribute__ ((packed));
+
+#define QDR_AC_MULTI_BUFFER_ENABLE 0x01
+
+/**
+ * struct qdr - queue description record (QDR)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @ac: adapter characteristics
+ * @iqdcnt: input queue descriptor count
+ * @oqdcnt: output queue descriptor count
+ * @iqdsz: inpout queue descriptor size
+ * @oqdsz: output queue descriptor size
+ * @qiba: queue information block address
+ * @qkey: queue information block key
+ * @qdf0: queue descriptions
+ */
+struct qdr {
+	u32 qfmt   : 8;
+	u32 pfmt   : 8;
+	u32	   : 8;
+	u32 ac	   : 8;
+	u32	   : 8;
+	u32 iqdcnt : 8;
+	u32	   : 8;
+	u32 oqdcnt : 8;
+	u32	   : 8;
+	u32 iqdsz  : 8;
+	u32	   : 8;
+	u32 oqdsz  : 8;
+	/* private: */
+	u32 res[9];
+	/* public: */
+	u64 qiba;
+	u32	   : 32;
+	u32 qkey   : 4;
+	u32	   : 28;
+	struct qdesfmt0 qdf0[126];
+} __attribute__ ((packed, aligned(4096)));
+
+#define QIB_AC_OUTBOUND_PCI_SUPPORTED	0x40
+#define QIB_RFLAGS_ENABLE_QEBSM		0x80
+#define QIB_RFLAGS_ENABLE_DATA_DIV	0x02
+
+/**
+ * struct qib - queue information block (QIB)
+ * @qfmt: queue format
+ * @pfmt: implementation dependent parameter format
+ * @rflags: QEBSM
+ * @ac: adapter characteristics
+ * @isliba: absolute address of first input SLIB
+ * @osliba: absolute address of first output SLIB
+ * @ebcnam: adapter identifier in EBCDIC
+ * @parm: implementation dependent parameters
+ */
+struct qib {
+	u32 qfmt   : 8;
+	u32 pfmt   : 8;
+	u32 rflags : 8;
+	u32 ac	   : 8;
+	u32	   : 32;
+	u64 isliba;
+	u64 osliba;
+	u32	   : 32;
+	u32	   : 32;
+	u8 ebcnam[8];
+	/* private: */
+	u8 res[88];
+	/* public: */
+	u8 parm[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slibe - storage list information block element (SLIBE)
+ * @parms: implementation dependent parameters
+ */
+struct slibe {
+	u64 parms;
+};
+
+/**
+ * struct qaob - queue asynchronous operation block
+ * @res0: reserved parameters
+ * @res1: reserved parameter
+ * @res2: reserved parameter
+ * @res3: reserved parameter
+ * @aorc: asynchronous operation return code
+ * @flags: internal flags
+ * @cbtbs: control block type
+ * @sb_count: number of storage blocks
+ * @sba: storage block element addresses
+ * @dcount: size of storage block elements
+ * @user0: user defineable value
+ * @res4: reserved paramater
+ * @user1: user defineable value
+ * @user2: user defineable value
+ */
+struct qaob {
+	u64 res0[6];
+	u8 res1;
+	u8 res2;
+	u8 res3;
+	u8 aorc;
+	u8 flags;
+	u16 cbtbs;
+	u8 sb_count;
+	u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER];
+	u64 user0;
+	u64 res4[2];
+	u64 user1;
+	u64 user2;
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct slib - storage list information block (SLIB)
+ * @nsliba: next SLIB address (if any)
+ * @sla: SL address
+ * @slsba: SLSB address
+ * @slibe: SLIB elements
+ */
+struct slib {
+	u64 nsliba;
+	u64 sla;
+	u64 slsba;
+	/* private: */
+	u8 res[1000];
+	/* public: */
+	struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(2048)));
+
+#define SBAL_EFLAGS_LAST_ENTRY		0x40
+#define SBAL_EFLAGS_CONTIGUOUS		0x20
+#define SBAL_EFLAGS_FIRST_FRAG		0x04
+#define SBAL_EFLAGS_MIDDLE_FRAG		0x08
+#define SBAL_EFLAGS_LAST_FRAG		0x0c
+#define SBAL_EFLAGS_MASK		0x6f
+
+#define SBAL_SFLAGS0_PCI_REQ		0x40
+#define SBAL_SFLAGS0_DATA_CONTINUATION	0x20
+
+/* Awesome OpenFCP extensions */
+#define SBAL_SFLAGS0_TYPE_STATUS	0x00
+#define SBAL_SFLAGS0_TYPE_WRITE		0x08
+#define SBAL_SFLAGS0_TYPE_READ		0x10
+#define SBAL_SFLAGS0_TYPE_WRITE_READ	0x18
+#define SBAL_SFLAGS0_MORE_SBALS		0x04
+#define SBAL_SFLAGS0_COMMAND		0x02
+#define SBAL_SFLAGS0_LAST_SBAL		0x00
+#define SBAL_SFLAGS0_ONLY_SBAL		SBAL_SFLAGS0_COMMAND
+#define SBAL_SFLAGS0_MIDDLE_SBAL	SBAL_SFLAGS0_MORE_SBALS
+#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND)
+
+/**
+ * struct qdio_buffer_element - SBAL entry
+ * @eflags: SBAL entry flags
+ * @scount: SBAL count
+ * @sflags: whole SBAL flags
+ * @length: length
+ * @addr: address
+*/
+struct qdio_buffer_element {
+	u8 eflags;
+	/* private: */
+	u8 res1;
+	/* public: */
+	u8 scount;
+	u8 sflags;
+	u32 length;
+	void *addr;
+} __attribute__ ((packed, aligned(16)));
+
+/**
+ * struct qdio_buffer - storage block address list (SBAL)
+ * @element: SBAL entries
+ */
+struct qdio_buffer {
+	struct qdio_buffer_element element[QDIO_MAX_ELEMENTS_PER_BUFFER];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct sl_element - storage list entry
+ * @sbal: absolute SBAL address
+ */
+struct sl_element {
+	unsigned long sbal;
+} __attribute__ ((packed));
+
+/**
+ * struct sl - storage list (SL)
+ * @element: SL entries
+ */
+struct sl {
+	struct sl_element element[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(1024)));
+
+/**
+ * struct slsb - storage list state block (SLSB)
+ * @val: state per buffer
+ */
+struct slsb {
+	u8 val[QDIO_MAX_BUFFERS_PER_Q];
+} __attribute__ ((packed, aligned(256)));
+
+/**
+ * struct qdio_outbuf_state - SBAL related asynchronous operation information
+ *   (for communication with upper layer programs)
+ *   (only required for use with completion queues)
+ * @flags: flags indicating state of buffer
+ * @aob: pointer to QAOB used for the particular SBAL
+ * @user: pointer to upper layer program's state information related to SBAL
+ *        (stored in user1 data of QAOB)
+ */
+struct qdio_outbuf_state {
+	u8 flags;
+	struct qaob *aob;
+	void *user;
+};
+
+#define QDIO_OUTBUF_STATE_FLAG_NONE	0x00
+#define QDIO_OUTBUF_STATE_FLAG_PENDING	0x01
+
+#define CHSC_AC1_INITIATE_INPUTQ	0x80
+
+
+/* qdio adapter-characteristics-1 flag */
+#define AC1_SIGA_INPUT_NEEDED		0x40	/* process input queues */
+#define AC1_SIGA_OUTPUT_NEEDED		0x20	/* process output queues */
+#define AC1_SIGA_SYNC_NEEDED		0x10	/* ask hypervisor to sync */
+#define AC1_AUTOMATIC_SYNC_ON_THININT	0x08	/* set by hypervisor */
+#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI	0x04	/* set by hypervisor */
+#define AC1_SC_QEBSM_AVAILABLE		0x02	/* available for subchannel */
+#define AC1_SC_QEBSM_ENABLED		0x01	/* enabled for subchannel */
+
+#define CHSC_AC2_MULTI_BUFFER_AVAILABLE	0x0080
+#define CHSC_AC2_MULTI_BUFFER_ENABLED	0x0040
+#define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
+#define CHSC_AC2_DATA_DIV_ENABLED	0x0002
+
+#define CHSC_AC3_FORMAT2_CQ_AVAILABLE	0x8000
+
+struct qdio_ssqd_desc {
+	u8 flags;
+	u8:8;
+	u16 sch;
+	u8 qfmt;
+	u8 parm;
+	u8 qdioac1;
+	u8 sch_class;
+	u8 pcnt;
+	u8 icnt;
+	u8:8;
+	u8 ocnt;
+	u8:8;
+	u8 mbccnt;
+	u16 qdioac2;
+	u64 sch_token;
+	u8 mro;
+	u8 mri;
+	u16 qdioac3;
+	u16:16;
+	u8:8;
+	u8 mmwc;
+} __attribute__ ((packed));
+
+/* params are: ccw_device, qdio_error, queue_number,
+   first element processed, number of elements processed, int_parm */
+typedef void qdio_handler_t(struct ccw_device *, unsigned int, int,
+			    int, int, unsigned long);
+
+/* qdio errors reported to the upper-layer program */
+#define QDIO_ERROR_ACTIVATE			0x0001
+#define QDIO_ERROR_GET_BUF_STATE		0x0002
+#define QDIO_ERROR_SET_BUF_STATE		0x0004
+#define QDIO_ERROR_SLSB_STATE			0x0100
+
+#define QDIO_ERROR_FATAL			0x00ff
+#define QDIO_ERROR_TEMPORARY			0xff00
+
+/* for qdio_cleanup */
+#define QDIO_FLAG_CLEANUP_USING_CLEAR		0x01
+#define QDIO_FLAG_CLEANUP_USING_HALT		0x02
+
+/**
+ * struct qdio_initialize - qdio initialization data
+ * @cdev: associated ccw device
+ * @q_format: queue format
+ * @adapter_name: name for the adapter
+ * @qib_param_field_format: format for qib_parm_field
+ * @qib_param_field: pointer to 128 bytes or NULL, if no param field
+ * @qib_rflags: rflags to set
+ * @input_slib_elements: pointer to no_input_qs * 128 words of data or NULL
+ * @output_slib_elements: pointer to no_output_qs * 128 words of data or NULL
+ * @no_input_qs: number of input queues
+ * @no_output_qs: number of output queues
+ * @input_handler: handler to be called for input queues
+ * @output_handler: handler to be called for output queues
+ * @queue_start_poll_array: polling handlers (one per input queue or NULL)
+ * @int_parm: interruption parameter
+ * @input_sbal_addr_array:  address of no_input_qs * 128 pointers
+ * @output_sbal_addr_array: address of no_output_qs * 128 pointers
+ * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL)
+ */
+struct qdio_initialize {
+	struct ccw_device *cdev;
+	unsigned char q_format;
+	unsigned char qdr_ac;
+	unsigned char adapter_name[8];
+	unsigned int qib_param_field_format;
+	unsigned char *qib_param_field;
+	unsigned char qib_rflags;
+	unsigned long *input_slib_elements;
+	unsigned long *output_slib_elements;
+	unsigned int no_input_qs;
+	unsigned int no_output_qs;
+	qdio_handler_t *input_handler;
+	qdio_handler_t *output_handler;
+	void (**queue_start_poll_array) (struct ccw_device *, int,
+					  unsigned long);
+	int scan_threshold;
+	unsigned long int_parm;
+	void **input_sbal_addr_array;
+	void **output_sbal_addr_array;
+	struct qdio_outbuf_state *output_sbal_state_array;
+};
+
+/**
+ * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
+ * @l3_ipv6_addr: entry contains IPv6 address
+ * @l3_ipv4_addr: entry contains IPv4 address
+ * @l2_addr_lnid: entry contains MAC address and VLAN ID
+ */
+enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
+
+/**
+ * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
+ * @nit:  Network interface token
+ * @addr: Address of one of the three types
+ *
+ * The struct is passed to the callback function by qdio_brinfo_desc()
+ */
+struct qdio_brinfo_entry_l3_ipv6 {
+	u64 nit;
+	struct { unsigned char _s6_addr[16]; } addr;
+} __packed;
+struct qdio_brinfo_entry_l3_ipv4 {
+	u64 nit;
+	struct { uint32_t _s_addr; } addr;
+} __packed;
+struct qdio_brinfo_entry_l2 {
+	u64 nit;
+	struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
+#define QDIO_STATE_INACTIVE		0x00000002 /* after qdio_cleanup */
+#define QDIO_STATE_ESTABLISHED		0x00000004 /* after qdio_establish */
+#define QDIO_STATE_ACTIVE		0x00000008 /* after qdio_activate */
+#define QDIO_STATE_STOPPED		0x00000010 /* after queues went down */
+
+#define QDIO_FLAG_SYNC_INPUT		0x01
+#define QDIO_FLAG_SYNC_OUTPUT		0x02
+#define QDIO_FLAG_PCI_OUT		0x10
+
+int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
+
+extern int qdio_allocate(struct qdio_initialize *);
+extern int qdio_establish(struct qdio_initialize *);
+extern int qdio_activate(struct ccw_device *);
+extern void qdio_release_aob(struct qaob *);
+extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int,
+		   unsigned int);
+extern int qdio_start_irq(struct ccw_device *, int);
+extern int qdio_stop_irq(struct ccw_device *, int);
+extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
+extern int qdio_shutdown(struct ccw_device *, int);
+extern int qdio_free(struct ccw_device *);
+extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
+extern int qdio_pnso_brinfo(struct subchannel_id schid,
+		int cnc, u16 *response,
+		void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
+				void *entry),
+		void *priv);
+
+#endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h
new file mode 100644
index 000000000..72786067b
--- /dev/null
+++ b/arch/s390/include/asm/reset.h
@@ -0,0 +1,20 @@
+/*
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_RESET_H
+#define _ASM_S390_RESET_H
+
+#include <linux/list.h>
+
+struct reset_call {
+	struct list_head list;
+	void (*fn)(void);
+};
+
+extern void register_reset_call(struct reset_call *reset);
+extern void unregister_reset_call(struct reset_call *reset);
+extern void s390_reset_system(void (*fn_pre)(void),
+			      void (*fn_post)(void *), void *data);
+#endif /* _ASM_S390_RESET_H */
diff --git a/arch/s390/include/asm/runtime_instr.h b/arch/s390/include/asm/runtime_instr.h
new file mode 100644
index 000000000..402ad6df4
--- /dev/null
+++ b/arch/s390/include/asm/runtime_instr.h
@@ -0,0 +1,90 @@
+#ifndef _RUNTIME_INSTR_H
+#define _RUNTIME_INSTR_H
+
+#define S390_RUNTIME_INSTR_START	0x1
+#define S390_RUNTIME_INSTR_STOP		0x2
+
+struct runtime_instr_cb {
+	__u64 buf_current;
+	__u64 buf_origin;
+	__u64 buf_limit;
+
+	__u32 valid		: 1;
+	__u32 pstate		: 1;
+	__u32 pstate_set_buf	: 1;
+	__u32 home_space	: 1;
+	__u32 altered		: 1;
+	__u32			: 3;
+	__u32 pstate_sample	: 1;
+	__u32 sstate_sample	: 1;
+	__u32 pstate_collect	: 1;
+	__u32 sstate_collect	: 1;
+	__u32			: 1;
+	__u32 halted_int	: 1;
+	__u32 int_requested	: 1;
+	__u32 buffer_full_int	: 1;
+	__u32 key		: 4;
+	__u32			: 9;
+	__u32 rgs		: 3;
+
+	__u32 mode		: 4;
+	__u32 next		: 1;
+	__u32 mae		: 1;
+	__u32			: 2;
+	__u32 call_type_br	: 1;
+	__u32 return_type_br	: 1;
+	__u32 other_type_br	: 1;
+	__u32 bc_other_type	: 1;
+	__u32 emit		: 1;
+	__u32 tx_abort		: 1;
+	__u32			: 2;
+	__u32 bp_xn		: 1;
+	__u32 bp_xt		: 1;
+	__u32 bp_ti		: 1;
+	__u32 bp_ni		: 1;
+	__u32 suppr_y		: 1;
+	__u32 suppr_z		: 1;
+
+	__u32 dc_miss_extra	: 1;
+	__u32 lat_lev_ignore	: 1;
+	__u32 ic_lat_lev	: 4;
+	__u32 dc_lat_lev	: 4;
+
+	__u64 reserved1;
+	__u64 scaling_factor;
+	__u64 rsic;
+	__u64 reserved2;
+} __packed __aligned(8);
+
+extern struct runtime_instr_cb runtime_instr_empty_cb;
+
+static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+	asm volatile(".insn	rsy,0xeb0000000060,0,0,%0"	/* LRIC */
+		: : "Q" (*cb));
+}
+
+static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+	asm volatile(".insn	rsy,0xeb0000000061,0,0,%0"	/* STRIC */
+		: "=Q" (*cb) : : "cc");
+}
+
+static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
+{
+	if (cb_prev)
+		store_runtime_instr_cb(cb_prev);
+}
+
+static inline void restore_ri_cb(struct runtime_instr_cb *cb_next,
+				 struct runtime_instr_cb *cb_prev)
+{
+	if (cb_next)
+		load_runtime_instr_cb(cb_next);
+	else if (cb_prev)
+		load_runtime_instr_cb(&runtime_instr_empty_cb);
+}
+
+void exit_thread_runtime_instr(void);
+
+#endif /* _RUNTIME_INSTR_H */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
new file mode 100644
index 000000000..4b43ee7e6
--- /dev/null
+++ b/arch/s390/include/asm/rwsem.h
@@ -0,0 +1,237 @@
+#ifndef _S390_RWSEM_H
+#define _S390_RWSEM_H
+
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 2002
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
+ */
+
+/*
+ *
+ * The MSW of the count is the negated number of active writers and waiting
+ * lockers, and the LSW is the total number of active locks
+ *
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
+ * uncontended lock. This can be determined because XADD returns the old value.
+ * Readers increment by 1 and see a positive value when uncontended, negative
+ * if there are writers (and maybe) readers waiting (in which case it goes to
+ * sleep).
+ *
+ * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
+ * be extended to 65534 by manually checking the whole MSW rather than relying
+ * on the S flag.
+ *
+ * The value of ACTIVE_BIAS supports up to 65535 active processes.
+ *
+ * This should be totally fair - if anything is waiting, a process that wants a
+ * lock will go to the back of the queue. When the currently active lock is
+ * released, if there's a writer at the front of the queue, then that and only
+ * that will be woken up; if there's a bunch of consequtive readers at the
+ * front, then they'll all be woken up, but no other readers will be.
+ */
+
+#ifndef _LINUX_RWSEM_H
+#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
+#endif
+
+#define RWSEM_UNLOCKED_VALUE	0x0000000000000000L
+#define RWSEM_ACTIVE_BIAS	0x0000000000000001L
+#define RWSEM_ACTIVE_MASK	0x00000000ffffffffL
+#define RWSEM_WAITING_BIAS	(-0x0000000100000000L)
+#define RWSEM_ACTIVE_READ_BIAS	RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS	(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+	signed long old, new;
+
+	asm volatile(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	aghi	%1,%4\n"
+		"	csg	%0,%1,%2\n"
+		"	jl	0b"
+		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
+		: "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS)
+		: "cc", "memory");
+	if (old < 0)
+		rwsem_down_read_failed(sem);
+}
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+	signed long old, new;
+
+	asm volatile(
+		"	lg	%0,%2\n"
+		"0:	ltgr	%1,%0\n"
+		"	jm	1f\n"
+		"	aghi	%1,%4\n"
+		"	csg	%0,%1,%2\n"
+		"	jl	0b\n"
+		"1:"
+		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
+		: "Q" (sem->count), "i" (RWSEM_ACTIVE_READ_BIAS)
+		: "cc", "memory");
+	return old >= 0 ? 1 : 0;
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
+{
+	signed long old, new, tmp;
+
+	tmp = RWSEM_ACTIVE_WRITE_BIAS;
+	asm volatile(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%4\n"
+		"	csg	%0,%1,%2\n"
+		"	jl	0b"
+		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
+		: "Q" (sem->count), "m" (tmp)
+		: "cc", "memory");
+	if (old != 0)
+		rwsem_down_write_failed(sem);
+}
+
+static inline void __down_write(struct rw_semaphore *sem)
+{
+	__down_write_nested(sem, 0);
+}
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+static inline int __down_write_trylock(struct rw_semaphore *sem)
+{
+	signed long old;
+
+	asm volatile(
+		"	lg	%0,%1\n"
+		"0:	ltgr	%0,%0\n"
+		"	jnz	1f\n"
+		"	csg	%0,%3,%1\n"
+		"	jl	0b\n"
+		"1:"
+		: "=&d" (old), "=Q" (sem->count)
+		: "Q" (sem->count), "d" (RWSEM_ACTIVE_WRITE_BIAS)
+		: "cc", "memory");
+	return (old == RWSEM_UNLOCKED_VALUE) ? 1 : 0;
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+	signed long old, new;
+
+	asm volatile(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	aghi	%1,%4\n"
+		"	csg	%0,%1,%2\n"
+		"	jl	0b"
+		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
+		: "Q" (sem->count), "i" (-RWSEM_ACTIVE_READ_BIAS)
+		: "cc", "memory");
+	if (new < 0)
+		if ((new & RWSEM_ACTIVE_MASK) == 0)
+			rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+	signed long old, new, tmp;
+
+	tmp = -RWSEM_ACTIVE_WRITE_BIAS;
+	asm volatile(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%4\n"
+		"	csg	%0,%1,%2\n"
+		"	jl	0b"
+		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
+		: "Q" (sem->count), "m" (tmp)
+		: "cc", "memory");
+	if (new < 0)
+		if ((new & RWSEM_ACTIVE_MASK) == 0)
+			rwsem_wake(sem);
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+	signed long old, new, tmp;
+
+	tmp = -RWSEM_WAITING_BIAS;
+	asm volatile(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%4\n"
+		"	csg	%0,%1,%2\n"
+		"	jl	0b"
+		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
+		: "Q" (sem->count), "m" (tmp)
+		: "cc", "memory");
+	if (new > 1)
+		rwsem_downgrade_wake(sem);
+}
+
+/*
+ * implement atomic add functionality
+ */
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
+{
+	signed long old, new;
+
+	asm volatile(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	agr	%1,%4\n"
+		"	csg	%0,%1,%2\n"
+		"	jl	0b"
+		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
+		: "Q" (sem->count), "d" (delta)
+		: "cc", "memory");
+}
+
+/*
+ * implement exchange and add functionality
+ */
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
+{
+	signed long old, new;
+
+	asm volatile(
+		"	lg	%0,%2\n"
+		"0:	lgr	%1,%0\n"
+		"	agr	%1,%4\n"
+		"	csg	%0,%1,%2\n"
+		"	jl	0b"
+		: "=&d" (old), "=&d" (new), "=Q" (sem->count)
+		: "Q" (sem->count), "d" (delta)
+		: "cc", "memory");
+	return new;
+}
+
+#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/schid.h b/arch/s390/include/asm/schid.h
new file mode 100644
index 000000000..40b47dfa9
--- /dev/null
+++ b/arch/s390/include/asm/schid.h
@@ -0,0 +1,21 @@
+#ifndef ASM_SCHID_H
+#define ASM_SCHID_H
+
+#include <linux/string.h>
+#include <uapi/asm/schid.h>
+
+/* Helper function for sane state of pre-allocated subchannel_id. */
+static inline void
+init_subchannel_id(struct subchannel_id *schid)
+{
+	memset(schid, 0, sizeof(struct subchannel_id));
+	schid->one = 1;
+}
+
+static inline int
+schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2)
+{
+	return !memcmp(schid1, schid2, sizeof(struct subchannel_id));
+}
+
+#endif /* ASM_SCHID_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
new file mode 100644
index 000000000..f1096bab5
--- /dev/null
+++ b/arch/s390/include/asm/sclp.h
@@ -0,0 +1,78 @@
+/*
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_SCLP_H
+#define _ASM_S390_SCLP_H
+
+#include <linux/types.h>
+#include <asm/chpid.h>
+#include <asm/cpu.h>
+
+#define SCLP_CHP_INFO_MASK_SIZE		32
+
+struct sclp_chp_info {
+	u8 recognized[SCLP_CHP_INFO_MASK_SIZE];
+	u8 standby[SCLP_CHP_INFO_MASK_SIZE];
+	u8 configured[SCLP_CHP_INFO_MASK_SIZE];
+};
+
+#define LOADPARM_LEN 8
+
+struct sclp_ipl_info {
+	int is_valid;
+	int has_dump;
+	char loadparm[LOADPARM_LEN];
+};
+
+struct sclp_cpu_entry {
+	u8 core_id;
+	u8 reserved0[2];
+	u8 : 3;
+	u8 siif : 1;
+	u8 sigpif : 1;
+	u8 : 3;
+	u8 reserved2[10];
+	u8 type;
+	u8 reserved1;
+} __attribute__((packed));
+
+struct sclp_cpu_info {
+	unsigned int configured;
+	unsigned int standby;
+	unsigned int combined;
+	int has_cpu_type;
+	struct sclp_cpu_entry cpu[MAX_CPU_ADDRESS + 1];
+};
+
+int sclp_get_cpu_info(struct sclp_cpu_info *info);
+int sclp_cpu_configure(u8 cpu);
+int sclp_cpu_deconfigure(u8 cpu);
+unsigned long long sclp_get_rnmax(void);
+unsigned long long sclp_get_rzm(void);
+unsigned int sclp_get_max_cpu(void);
+unsigned int sclp_get_mtid(u8 cpu_type);
+unsigned int sclp_get_mtid_max(void);
+unsigned int sclp_get_mtid_prev(void);
+int sclp_sdias_blk_count(void);
+int sclp_sdias_copy(void *dest, int blk_num, int nr_blks);
+int sclp_chp_configure(struct chp_id chpid);
+int sclp_chp_deconfigure(struct chp_id chpid);
+int sclp_chp_read_info(struct sclp_chp_info *info);
+void sclp_get_ipl_info(struct sclp_ipl_info *info);
+bool __init sclp_has_linemode(void);
+bool __init sclp_has_vt220(void);
+bool sclp_has_sprp(void);
+int sclp_pci_configure(u32 fid);
+int sclp_pci_deconfigure(u32 fid);
+int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
+unsigned long sclp_get_hsa_size(void);
+void sclp_early_detect(void);
+int sclp_has_siif(void);
+int sclp_has_sigpif(void);
+unsigned int sclp_get_ibc(void);
+
+long _sclp_print_early(const char *);
+
+#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
new file mode 100644
index 000000000..4af99cdad
--- /dev/null
+++ b/arch/s390/include/asm/scsw.h
@@ -0,0 +1,988 @@
+/*
+ *  Helper functions for scsw access.
+ *
+ *    Copyright IBM Corp. 2008, 2012
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_SCSW_H_
+#define _ASM_S390_SCSW_H_
+
+#include <linux/types.h>
+#include <asm/css_chars.h>
+#include <asm/cio.h>
+
+/**
+ * struct cmd_scsw - command-mode subchannel status word
+ * @key: subchannel key
+ * @sctl: suspend control
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @pfch: prefetch
+ * @isic: initial-status interruption control
+ * @alcc: address-limit checking control
+ * @ssi: suppress-suspended interruption
+ * @zcc: zero condition code
+ * @ectl: extended control
+ * @pno: path not operational
+ * @res: reserved
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @cpa: channel program address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @count: residual count
+ */
+struct cmd_scsw {
+	__u32 key  : 4;
+	__u32 sctl : 1;
+	__u32 eswf : 1;
+	__u32 cc   : 2;
+	__u32 fmt  : 1;
+	__u32 pfch : 1;
+	__u32 isic : 1;
+	__u32 alcc : 1;
+	__u32 ssi  : 1;
+	__u32 zcc  : 1;
+	__u32 ectl : 1;
+	__u32 pno  : 1;
+	__u32 res  : 1;
+	__u32 fctl : 3;
+	__u32 actl : 7;
+	__u32 stctl : 5;
+	__u32 cpa;
+	__u32 dstat : 8;
+	__u32 cstat : 8;
+	__u32 count : 16;
+} __attribute__ ((packed));
+
+/**
+ * struct tm_scsw - transport-mode subchannel status word
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @fmt: format
+ * @x: IRB-format control
+ * @q: interrogate-complete
+ * @ectl: extended control
+ * @pno: path not operational
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @tcw: TCW address
+ * @dstat: device status
+ * @cstat: subchannel status
+ * @fcxs: FCX status
+ * @schxs: subchannel-extended status
+ */
+struct tm_scsw {
+	u32 key:4;
+	u32 :1;
+	u32 eswf:1;
+	u32 cc:2;
+	u32 fmt:3;
+	u32 x:1;
+	u32 q:1;
+	u32 :1;
+	u32 ectl:1;
+	u32 pno:1;
+	u32 :1;
+	u32 fctl:3;
+	u32 actl:7;
+	u32 stctl:5;
+	u32 tcw;
+	u32 dstat:8;
+	u32 cstat:8;
+	u32 fcxs:8;
+	u32 schxs:8;
+} __attribute__ ((packed));
+
+/**
+ * struct eadm_scsw - subchannel status word for eadm subchannels
+ * @key: subchannel key
+ * @eswf: esw format
+ * @cc: deferred condition code
+ * @ectl: extended control
+ * @fctl: function control
+ * @actl: activity control
+ * @stctl: status control
+ * @aob: AOB address
+ * @dstat: device status
+ * @cstat: subchannel status
+ */
+struct eadm_scsw {
+	u32 key:4;
+	u32:1;
+	u32 eswf:1;
+	u32 cc:2;
+	u32:6;
+	u32 ectl:1;
+	u32:2;
+	u32 fctl:3;
+	u32 actl:7;
+	u32 stctl:5;
+	u32 aob;
+	u32 dstat:8;
+	u32 cstat:8;
+	u32:16;
+} __packed;
+
+/**
+ * union scsw - subchannel status word
+ * @cmd: command-mode SCSW
+ * @tm: transport-mode SCSW
+ * @eadm: eadm SCSW
+ */
+union scsw {
+	struct cmd_scsw cmd;
+	struct tm_scsw tm;
+	struct eadm_scsw eadm;
+} __packed;
+
+#define SCSW_FCTL_CLEAR_FUNC	 0x1
+#define SCSW_FCTL_HALT_FUNC	 0x2
+#define SCSW_FCTL_START_FUNC	 0x4
+
+#define SCSW_ACTL_SUSPENDED	 0x1
+#define SCSW_ACTL_DEVACT	 0x2
+#define SCSW_ACTL_SCHACT	 0x4
+#define SCSW_ACTL_CLEAR_PEND	 0x8
+#define SCSW_ACTL_HALT_PEND	 0x10
+#define SCSW_ACTL_START_PEND	 0x20
+#define SCSW_ACTL_RESUME_PEND	 0x40
+
+#define SCSW_STCTL_STATUS_PEND	 0x1
+#define SCSW_STCTL_SEC_STATUS	 0x2
+#define SCSW_STCTL_PRIM_STATUS	 0x4
+#define SCSW_STCTL_INTER_STATUS	 0x8
+#define SCSW_STCTL_ALERT_STATUS	 0x10
+
+#define DEV_STAT_ATTENTION	 0x80
+#define DEV_STAT_STAT_MOD	 0x40
+#define DEV_STAT_CU_END		 0x20
+#define DEV_STAT_BUSY		 0x10
+#define DEV_STAT_CHN_END	 0x08
+#define DEV_STAT_DEV_END	 0x04
+#define DEV_STAT_UNIT_CHECK	 0x02
+#define DEV_STAT_UNIT_EXCEP	 0x01
+
+#define SCHN_STAT_PCI		 0x80
+#define SCHN_STAT_INCORR_LEN	 0x40
+#define SCHN_STAT_PROG_CHECK	 0x20
+#define SCHN_STAT_PROT_CHECK	 0x10
+#define SCHN_STAT_CHN_DATA_CHK	 0x08
+#define SCHN_STAT_CHN_CTRL_CHK	 0x04
+#define SCHN_STAT_INTF_CTRL_CHK	 0x02
+#define SCHN_STAT_CHAIN_CHECK	 0x01
+
+/*
+ * architectured values for first sense byte
+ */
+#define SNS0_CMD_REJECT		0x80
+#define SNS_CMD_REJECT		SNS0_CMD_REJEC
+#define SNS0_INTERVENTION_REQ	0x40
+#define SNS0_BUS_OUT_CHECK	0x20
+#define SNS0_EQUIPMENT_CHECK	0x10
+#define SNS0_DATA_CHECK		0x08
+#define SNS0_OVERRUN		0x04
+#define SNS0_INCOMPL_DOMAIN	0x01
+
+/*
+ * architectured values for second sense byte
+ */
+#define SNS1_PERM_ERR		0x80
+#define SNS1_INV_TRACK_FORMAT	0x40
+#define SNS1_EOC		0x20
+#define SNS1_MESSAGE_TO_OPER	0x10
+#define SNS1_NO_REC_FOUND	0x08
+#define SNS1_FILE_PROTECTED	0x04
+#define SNS1_WRITE_INHIBITED	0x02
+#define SNS1_INPRECISE_END	0x01
+
+/*
+ * architectured values for third sense byte
+ */
+#define SNS2_REQ_INH_WRITE	0x80
+#define SNS2_CORRECTABLE	0x40
+#define SNS2_FIRST_LOG_ERR	0x20
+#define SNS2_ENV_DATA_PRESENT	0x10
+#define SNS2_INPRECISE_END	0x04
+
+/**
+ * scsw_is_tm - check for transport mode scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the specified scsw is a transport mode scsw, zero
+ * otherwise.
+ */
+static inline int scsw_is_tm(union scsw *scsw)
+{
+	return css_general_characteristics.fcx && (scsw->tm.x == 1);
+}
+
+/**
+ * scsw_key - return scsw key field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the key field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_key(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.key;
+	else
+		return scsw->cmd.key;
+}
+
+/**
+ * scsw_eswf - return scsw eswf field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the eswf field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_eswf(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.eswf;
+	else
+		return scsw->cmd.eswf;
+}
+
+/**
+ * scsw_cc - return scsw cc field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cc field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cc(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.cc;
+	else
+		return scsw->cmd.cc;
+}
+
+/**
+ * scsw_ectl - return scsw ectl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the ectl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_ectl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.ectl;
+	else
+		return scsw->cmd.ectl;
+}
+
+/**
+ * scsw_pno - return scsw pno field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the pno field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_pno(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.pno;
+	else
+		return scsw->cmd.pno;
+}
+
+/**
+ * scsw_fctl - return scsw fctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the fctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_fctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.fctl;
+	else
+		return scsw->cmd.fctl;
+}
+
+/**
+ * scsw_actl - return scsw actl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the actl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_actl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.actl;
+	else
+		return scsw->cmd.actl;
+}
+
+/**
+ * scsw_stctl - return scsw stctl field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the stctl field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_stctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.stctl;
+	else
+		return scsw->cmd.stctl;
+}
+
+/**
+ * scsw_dstat - return scsw dstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the dstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_dstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.dstat;
+	else
+		return scsw->cmd.dstat;
+}
+
+/**
+ * scsw_cstat - return scsw cstat field
+ * @scsw: pointer to scsw
+ *
+ * Return the value of the cstat field of the specified scsw, regardless of
+ * whether it is a transport mode or command mode scsw.
+ */
+static inline u32 scsw_cstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw->tm.cstat;
+	else
+		return scsw->cmd.cstat;
+}
+
+/**
+ * scsw_cmd_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_key(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_sctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_eswf(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_cmd_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fmt(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_pfch - check pfch field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pfch field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pfch(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_isic - check isic field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the isic field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_isic(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_alcc - check alcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the alcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_alcc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_ssi - check ssi field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ssi field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ssi(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_cmd_is_valid_zcc - check zcc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the zcc field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_zcc(union scsw *scsw)
+{
+	return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_ectl(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS);
+}
+
+/**
+ * scsw_cmd_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_pno(union scsw *scsw)
+{
+	return (scsw->cmd.fctl != 0) &&
+	       (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) ||
+		 ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) &&
+		  (scsw->cmd.actl & SCSW_ACTL_SUSPENDED)));
+}
+
+/**
+ * scsw_cmd_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_fctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_actl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_stctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_cmd_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_dstat(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_cmd_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified command mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_cmd_is_valid_cstat(union scsw *scsw)
+{
+	return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->cmd.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_key(union scsw *scsw)
+{
+	return (scsw->tm.fctl & SCSW_FCTL_START_FUNC);
+}
+
+/**
+ * scsw_tm_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_eswf(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cc(union scsw *scsw)
+{
+	return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) &&
+	       (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND);
+}
+
+/**
+ * scsw_tm_is_valid_fmt - check fmt field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fmt field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fmt(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_x - check x field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the x field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_x(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_q - check q field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the q field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_q(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_ectl(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
+	       (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS);
+}
+
+/**
+ * scsw_tm_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_pno(union scsw *scsw)
+{
+	return (scsw->tm.fctl != 0) &&
+	       (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) ||
+		 ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) &&
+		  (scsw->tm.actl & SCSW_ACTL_SUSPENDED)));
+}
+
+/**
+ * scsw_tm_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_actl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_stctl(union scsw *scsw)
+{
+	/* Only valid if pmcw.dnv == 1*/
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_dstat(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_cstat(union scsw *scsw)
+{
+	return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) &&
+	       (scsw->tm.cc != 3);
+}
+
+/**
+ * scsw_tm_is_valid_fcxs - check fcxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fcxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_fcxs(union scsw *scsw)
+{
+	return 1;
+}
+
+/**
+ * scsw_tm_is_valid_schxs - check schxs field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the schxs field of the specified transport mode scsw is
+ * valid, zero otherwise.
+ */
+static inline int scsw_tm_is_valid_schxs(union scsw *scsw)
+{
+	return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK |
+				  SCHN_STAT_INTF_CTRL_CHK |
+				  SCHN_STAT_PROT_CHECK |
+				  SCHN_STAT_CHN_DATA_CHK));
+}
+
+/**
+ * scsw_is_valid_actl - check actl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the actl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_actl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_actl(scsw);
+	else
+		return scsw_cmd_is_valid_actl(scsw);
+}
+
+/**
+ * scsw_is_valid_cc - check cc field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cc field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cc(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_cc(scsw);
+	else
+		return scsw_cmd_is_valid_cc(scsw);
+}
+
+/**
+ * scsw_is_valid_cstat - check cstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the cstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_cstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_cstat(scsw);
+	else
+		return scsw_cmd_is_valid_cstat(scsw);
+}
+
+/**
+ * scsw_is_valid_dstat - check dstat field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the dstat field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_dstat(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_dstat(scsw);
+	else
+		return scsw_cmd_is_valid_dstat(scsw);
+}
+
+/**
+ * scsw_is_valid_ectl - check ectl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the ectl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_ectl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_ectl(scsw);
+	else
+		return scsw_cmd_is_valid_ectl(scsw);
+}
+
+/**
+ * scsw_is_valid_eswf - check eswf field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the eswf field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_eswf(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_eswf(scsw);
+	else
+		return scsw_cmd_is_valid_eswf(scsw);
+}
+
+/**
+ * scsw_is_valid_fctl - check fctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the fctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_fctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_fctl(scsw);
+	else
+		return scsw_cmd_is_valid_fctl(scsw);
+}
+
+/**
+ * scsw_is_valid_key - check key field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the key field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_key(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_key(scsw);
+	else
+		return scsw_cmd_is_valid_key(scsw);
+}
+
+/**
+ * scsw_is_valid_pno - check pno field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the pno field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_pno(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_pno(scsw);
+	else
+		return scsw_cmd_is_valid_pno(scsw);
+}
+
+/**
+ * scsw_is_valid_stctl - check stctl field validity
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the stctl field of the specified scsw is valid,
+ * regardless of whether it is a transport mode or command mode scsw.
+ * Return zero if the field does not contain a valid value.
+ */
+static inline int scsw_is_valid_stctl(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_valid_stctl(scsw);
+	else
+		return scsw_cmd_is_valid_stctl(scsw);
+}
+
+/**
+ * scsw_cmd_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the command mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_cmd_is_solicited(union scsw *scsw)
+{
+	return (scsw->cmd.cc != 0) || (scsw->cmd.stctl !=
+		(SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_tm_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport mode scsw indicates that the associated
+ * status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_tm_is_solicited(union scsw *scsw)
+{
+	return (scsw->tm.cc != 0) || (scsw->tm.stctl !=
+		(SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS));
+}
+
+/**
+ * scsw_is_solicited - check for solicited scsw
+ * @scsw: pointer to scsw
+ *
+ * Return non-zero if the transport or command mode scsw indicates that the
+ * associated status condition is solicited, zero if it is unsolicited.
+ */
+static inline int scsw_is_solicited(union scsw *scsw)
+{
+	if (scsw_is_tm(scsw))
+		return scsw_tm_is_solicited(scsw);
+	else
+		return scsw_cmd_is_solicited(scsw);
+}
+
+#endif /* _ASM_S390_SCSW_H_ */
diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h
new file mode 100644
index 000000000..781a9cf9b
--- /dev/null
+++ b/arch/s390/include/asm/seccomp.h
@@ -0,0 +1,16 @@
+#ifndef _ASM_S390_SECCOMP_H
+#define _ASM_S390_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_read	__NR_read
+#define __NR_seccomp_write	__NR_write
+#define __NR_seccomp_exit	__NR_exit
+#define __NR_seccomp_sigreturn	__NR_sigreturn
+
+#define __NR_seccomp_read_32	__NR_read
+#define __NR_seccomp_write_32	__NR_write
+#define __NR_seccomp_exit_32	__NR_exit
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#endif	/* _ASM_S390_SECCOMP_H */
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
new file mode 100644
index 000000000..fbd9116eb
--- /dev/null
+++ b/arch/s390/include/asm/sections.h
@@ -0,0 +1,8 @@
+#ifndef _S390_SECTIONS_H
+#define _S390_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char _eshared[], _ehead[];
+
+#endif
diff --git a/arch/s390/include/asm/segment.h b/arch/s390/include/asm/segment.h
new file mode 100644
index 000000000..8bfce3475
--- /dev/null
+++ b/arch/s390/include/asm/segment.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_SEGMENT_H
+#define _ASM_SEGMENT_H
+
+#endif
diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h
new file mode 100644
index 000000000..5b3e48ef5
--- /dev/null
+++ b/arch/s390/include/asm/serial.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SERIAL_H
+#define _ASM_S390_SERIAL_H
+
+#define BASE_BAUD 0
+
+#endif /* _ASM_S390_SERIAL_H */
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
new file mode 100644
index 000000000..b8ffc1bd0
--- /dev/null
+++ b/arch/s390/include/asm/setup.h
@@ -0,0 +1,119 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2010
+ */
+#ifndef _ASM_S390_SETUP_H
+#define _ASM_S390_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+
+#define PARMAREA		0x10400
+
+#ifndef __ASSEMBLY__
+
+#include <asm/lowcore.h>
+#include <asm/types.h>
+
+#define IPL_DEVICE        (*(unsigned long *)  (0x10400))
+#define INITRD_START      (*(unsigned long *)  (0x10408))
+#define INITRD_SIZE       (*(unsigned long *)  (0x10410))
+#define OLDMEM_BASE	  (*(unsigned long *)  (0x10418))
+#define OLDMEM_SIZE	  (*(unsigned long *)  (0x10420))
+#define COMMAND_LINE      ((char *)            (0x10480))
+
+extern int memory_end_set;
+extern unsigned long memory_end;
+extern unsigned long max_physmem_end;
+
+extern void detect_memory_memblock(void);
+
+/*
+ * Machine features detected in head.S
+ */
+
+#define MACHINE_FLAG_VM		(1UL << 0)
+#define MACHINE_FLAG_IEEE	(1UL << 1)
+#define MACHINE_FLAG_CSP	(1UL << 2)
+#define MACHINE_FLAG_MVPG	(1UL << 3)
+#define MACHINE_FLAG_DIAG44	(1UL << 4)
+#define MACHINE_FLAG_IDTE	(1UL << 5)
+#define MACHINE_FLAG_DIAG9C	(1UL << 6)
+#define MACHINE_FLAG_KVM	(1UL << 8)
+#define MACHINE_FLAG_ESOP	(1UL << 9)
+#define MACHINE_FLAG_EDAT1	(1UL << 10)
+#define MACHINE_FLAG_EDAT2	(1UL << 11)
+#define MACHINE_FLAG_LPAR	(1UL << 12)
+#define MACHINE_FLAG_LPP	(1UL << 13)
+#define MACHINE_FLAG_TOPOLOGY	(1UL << 14)
+#define MACHINE_FLAG_TE		(1UL << 15)
+#define MACHINE_FLAG_TLB_LC	(1UL << 17)
+#define MACHINE_FLAG_VX		(1UL << 18)
+#define MACHINE_FLAG_CAD	(1UL << 19)
+
+#define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
+#define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
+#define MACHINE_IS_LPAR		(S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
+
+#define MACHINE_HAS_DIAG9C	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_HAS_ESOP	(S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
+#define MACHINE_HAS_PFMF	MACHINE_HAS_EDAT1
+#define MACHINE_HAS_HPAGE	MACHINE_HAS_EDAT1
+
+#define MACHINE_HAS_IDTE	(S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
+#define MACHINE_HAS_DIAG44	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
+#define MACHINE_HAS_EDAT1	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
+#define MACHINE_HAS_EDAT2	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
+#define MACHINE_HAS_LPP		(S390_lowcore.machine_flags & MACHINE_FLAG_LPP)
+#define MACHINE_HAS_TOPOLOGY	(S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
+#define MACHINE_HAS_TE		(S390_lowcore.machine_flags & MACHINE_FLAG_TE)
+#define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
+#define MACHINE_HAS_VX		(S390_lowcore.machine_flags & MACHINE_FLAG_VX)
+#define MACHINE_HAS_CAD		(S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
+
+/*
+ * Console mode. Override with conmode=
+ */
+extern unsigned int console_mode;
+extern unsigned int console_devno;
+extern unsigned int console_irq;
+
+extern char vmhalt_cmd[];
+extern char vmpoff_cmd[];
+
+#define CONSOLE_IS_UNDEFINED	(console_mode == 0)
+#define CONSOLE_IS_SCLP		(console_mode == 1)
+#define CONSOLE_IS_3215		(console_mode == 2)
+#define CONSOLE_IS_3270		(console_mode == 3)
+#define SET_CONSOLE_SCLP	do { console_mode = 1; } while (0)
+#define SET_CONSOLE_3215	do { console_mode = 2; } while (0)
+#define SET_CONSOLE_3270	do { console_mode = 3; } while (0)
+
+#define NSS_NAME_SIZE	8
+extern char kernel_nss_name[];
+
+#ifdef CONFIG_PFAULT
+extern int pfault_init(void);
+extern void pfault_fini(void);
+#else /* CONFIG_PFAULT */
+#define pfault_init()		({-1;})
+#define pfault_fini()		do { } while (0)
+#endif /* CONFIG_PFAULT */
+
+extern void cmma_init(void);
+
+extern void (*_machine_restart)(char *command);
+extern void (*_machine_halt)(void);
+extern void (*_machine_power_off)(void);
+
+#else /* __ASSEMBLY__ */
+
+#define IPL_DEVICE        0x10400
+#define INITRD_START      0x10408
+#define INITRD_SIZE       0x10410
+#define OLDMEM_BASE	  0x10418
+#define OLDMEM_SIZE	  0x10420
+#define COMMAND_LINE      0x10480
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_S390_SETUP_H */
diff --git a/arch/s390/include/asm/sfp-machine.h b/arch/s390/include/asm/sfp-machine.h
new file mode 100644
index 000000000..4e16aede4
--- /dev/null
+++ b/arch/s390/include/asm/sfp-machine.h
@@ -0,0 +1,142 @@
+/* Machine-dependent software floating-point definitions.
+   S/390 kernel version.
+   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef _SFP_MACHINE_H
+#define _SFP_MACHINE_H
+   
+
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned int
+#define _FP_WS_TYPE		signed int
+#define _FP_I_TYPE		int
+
+#define _FP_MUL_MEAT_S(R,X,Y)					\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)					\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)					\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/*
+ * If one NaN is signaling and the other is not,
+ * we choose that one, otherwise we choose X.
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)                      \
+  do {                                                          \
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)          \
+        && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))     \
+      {                                                         \
+        R##_s = Y##_s;                                          \
+        _FP_FRAC_COPY_##wc(R,Y);                                \
+      }                                                         \
+    else                                                        \
+      {                                                         \
+        R##_s = X##_s;                                          \
+        _FP_FRAC_COPY_##wc(R,X);                                \
+      }                                                         \
+    R##_c = FP_CLS_NAN;                                         \
+  } while (0)
+
+/* Some assembly to speed things up. */
+#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({		\
+	unsigned int __r2 = (x2) + (y2);			\
+	unsigned int __r1 = (x1);				\
+	unsigned int __r0 = (x0);				\
+	asm volatile(						\
+		"	alr	%2,%3\n"			\
+		"	brc	12,0f\n"			\
+		"	lhi	0,1\n"				\
+		"	alr	%1,0\n"				\
+		"	brc	12,0f\n"			\
+		"	alr	%0,0\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
+		: "d" (y0), "i" (1) : "cc", "0" );		\
+	asm volatile(						\
+		"	alr	%1,%2\n"			\
+		"	brc	12,0f\n"			\
+		"	ahi	%0,1\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1)			\
+		: "d" (y1) : "cc");				\
+	(r2) = __r2;						\
+	(r1) = __r1;						\
+	(r0) = __r0;						\
+})
+
+#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) ({		\
+	unsigned int __r2 = (x2) - (y2);			\
+	unsigned int __r1 = (x1);				\
+	unsigned int __r0 = (x0);				\
+	asm volatile(						\
+		"	slr   %2,%3\n"				\
+		"	brc	3,0f\n"				\
+		"	lhi	0,1\n"				\
+		"	slr	%1,0\n"				\
+		"	brc	3,0f\n"				\
+		"	slr	%0,0\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
+		: "d" (y0) : "cc", "0");			\
+	asm volatile(						\
+		"	slr	%1,%2\n"			\
+		"	brc	3,0f\n"				\
+		"	ahi	%0,-1\n"			\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1)			\
+		: "d" (y1) : "cc");				\
+	(r2) = __r2;						\
+	(r1) = __r1;						\
+	(r0) = __r0;						\
+})
+
+#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE	mode
+
+/* Exception flags. */
+#define FP_EX_INVALID		0x800000
+#define FP_EX_DIVZERO		0x400000
+#define FP_EX_OVERFLOW		0x200000
+#define FP_EX_UNDERFLOW		0x100000
+#define FP_EX_INEXACT		0x080000
+
+/* We write the results always */
+#define FP_INHIBIT_RESULTS 0
+
+#endif
diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h
new file mode 100644
index 000000000..c8b7cf9d6
--- /dev/null
+++ b/arch/s390/include/asm/sfp-util.h
@@ -0,0 +1,67 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({		\
+	unsigned int __sh = (ah);			\
+	unsigned int __sl = (al);			\
+	asm volatile(					\
+		"	alr	%1,%3\n"		\
+		"	brc	12,0f\n"		\
+		"	ahi	%0,1\n"			\
+		"0:	alr  %0,%2"			\
+		: "+&d" (__sh), "+d" (__sl)		\
+		: "d" (bh), "d" (bl) : "cc");		\
+	(sh) = __sh;					\
+	(sl) = __sl;					\
+})
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({		\
+	unsigned int __sh = (ah);			\
+	unsigned int __sl = (al);			\
+	asm volatile(					\
+		"	slr	%1,%3\n"		\
+		"	brc	3,0f\n"			\
+		"	ahi	%0,-1\n"		\
+		"0:	slr	%0,%2"			\
+		: "+&d" (__sh), "+d" (__sl)		\
+		: "d" (bh), "d" (bl) : "cc");		\
+	(sh) = __sh;					\
+	(sl) = __sl;					\
+})
+
+/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
+#define umul_ppmm(wh, wl, u, v) ({			\
+	unsigned int __wh = u;				\
+	unsigned int __wl = v;				\
+	asm volatile(					\
+		"	ltr	1,%0\n"			\
+		"	mr	0,%1\n"			\
+		"	jnm	0f\n"				\
+		"	alr	0,%1\n"			\
+		"0:	ltr	%1,%1\n"			\
+		"	jnm	1f\n"				\
+		"	alr	0,%0\n"			\
+		"1:	lr	%0,0\n"			\
+		"	lr	%1,1\n"			\
+		: "+d" (__wh), "+d" (__wl)		\
+		: : "0", "1", "cc");			\
+	wh = __wh;					\
+	wl = __wl;					\
+})
+
+#define udiv_qrnnd(q, r, n1, n0, d)			\
+  do { unsigned long __n;				\
+       unsigned int __r, __d;				\
+    __n = ((unsigned long)(n1) << 32) + n0;		\
+    __d = (d);						\
+    (q) = __n / __d;					\
+    (r) = __n % __d;					\
+  } while (0)
+
+#define UDIV_NEEDS_NORMALIZATION 0
+
+#define abort() BUG()
+
+#define __BYTE_ORDER __BIG_ENDIAN
diff --git a/arch/s390/include/asm/shmparam.h b/arch/s390/include/asm/shmparam.h
new file mode 100644
index 000000000..e98518273
--- /dev/null
+++ b/arch/s390/include/asm/shmparam.h
@@ -0,0 +1,11 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/shmparam.h"
+ */
+#ifndef _ASM_S390_SHMPARAM_H
+#define _ASM_S390_SHMPARAM_H
+
+#define SHMLBA PAGE_SIZE                 /* attach addr a multiple of this */
+
+#endif /* _ASM_S390_SHMPARAM_H */
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
new file mode 100644
index 000000000..abf9e5735
--- /dev/null
+++ b/arch/s390/include/asm/signal.h
@@ -0,0 +1,25 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/signal.h"
+ */
+#ifndef _ASMS390_SIGNAL_H
+#define _ASMS390_SIGNAL_H
+
+#include <uapi/asm/signal.h>
+
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+#include <asm/sigcontext.h>
+#define _NSIG           _SIGCONTEXT_NSIG
+#define _NSIG_BPW       _SIGCONTEXT_NSIG_BPW
+#define _NSIG_WORDS     _SIGCONTEXT_NSIG_WORDS
+
+typedef unsigned long old_sigset_t;             /* at least 32 bits */
+
+typedef struct {
+        unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+#define __ARCH_HAS_SA_RESTORER
+#endif
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
new file mode 100644
index 000000000..ec60cf7fa
--- /dev/null
+++ b/arch/s390/include/asm/sigp.h
@@ -0,0 +1,57 @@
+#ifndef __S390_ASM_SIGP_H
+#define __S390_ASM_SIGP_H
+
+/* SIGP order codes */
+#define SIGP_SENSE		      1
+#define SIGP_EXTERNAL_CALL	      2
+#define SIGP_EMERGENCY_SIGNAL	      3
+#define SIGP_START		      4
+#define SIGP_STOP		      5
+#define SIGP_RESTART		      6
+#define SIGP_STOP_AND_STORE_STATUS    9
+#define SIGP_INITIAL_CPU_RESET	     11
+#define SIGP_CPU_RESET		     12
+#define SIGP_SET_PREFIX		     13
+#define SIGP_STORE_STATUS_AT_ADDRESS 14
+#define SIGP_SET_ARCHITECTURE	     18
+#define SIGP_COND_EMERGENCY_SIGNAL   19
+#define SIGP_SENSE_RUNNING	     21
+#define SIGP_SET_MULTI_THREADING     22
+#define SIGP_STORE_ADDITIONAL_STATUS 23
+
+/* SIGP condition codes */
+#define SIGP_CC_ORDER_CODE_ACCEPTED 0
+#define SIGP_CC_STATUS_STORED	    1
+#define SIGP_CC_BUSY		    2
+#define SIGP_CC_NOT_OPERATIONAL	    3
+
+/* SIGP cpu status bits */
+
+#define SIGP_STATUS_CHECK_STOP		0x00000010UL
+#define SIGP_STATUS_STOPPED		0x00000040UL
+#define SIGP_STATUS_EXT_CALL_PENDING	0x00000080UL
+#define SIGP_STATUS_INVALID_PARAMETER	0x00000100UL
+#define SIGP_STATUS_INCORRECT_STATE	0x00000200UL
+#define SIGP_STATUS_NOT_RUNNING		0x00000400UL
+
+#ifndef __ASSEMBLY__
+
+static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+			      u32 *status)
+{
+	register unsigned long reg1 asm ("1") = parm;
+	int cc;
+
+	asm volatile(
+		"	sigp	%1,%2,0(%3)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+	if (status && cc == 1)
+		*status = reg1;
+	return cc;
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
new file mode 100644
index 000000000..b3bd0282d
--- /dev/null
+++ b/arch/s390/include/asm/smp.h
@@ -0,0 +1,80 @@
+/*
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Denis Joseph Barrow,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <asm/sigp.h>
+
+#ifdef CONFIG_SMP
+
+#include <asm/lowcore.h>
+
+#define raw_smp_processor_id()	(S390_lowcore.cpu_nr)
+
+extern struct mutex smp_cpu_state_mutex;
+extern unsigned int smp_cpu_mt_shift;
+extern unsigned int smp_cpu_mtid;
+
+extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+extern void smp_call_online_cpu(void (*func)(void *), void *);
+extern void smp_call_ipl_cpu(void (*func)(void *), void *);
+
+extern int smp_find_processor_id(u16 address);
+extern int smp_store_status(int cpu);
+extern int smp_vcpu_scheduled(int cpu);
+extern void smp_yield_cpu(int cpu);
+extern void smp_cpu_set_polarization(int cpu, int val);
+extern int smp_cpu_get_polarization(int cpu);
+extern void smp_fill_possible_mask(void);
+
+#else /* CONFIG_SMP */
+
+#define smp_cpu_mtid	0
+
+static inline void smp_call_ipl_cpu(void (*func)(void *), void *data)
+{
+	func(data);
+}
+
+static inline void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+	func(data);
+}
+
+static inline int smp_find_processor_id(u16 address) { return 0; }
+static inline int smp_store_status(int cpu) { return 0; }
+static inline int smp_vcpu_scheduled(int cpu) { return 1; }
+static inline void smp_yield_cpu(int cpu) { }
+static inline void smp_fill_possible_mask(void) { }
+
+#endif /* CONFIG_SMP */
+
+static inline void smp_stop_cpu(void)
+{
+	u16 pcpu = stap();
+
+	for (;;) {
+		__pcpu_sigp(pcpu, SIGP_STOP, 0, NULL);
+		cpu_relax();
+	}
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern int smp_rescan_cpus(void);
+extern void __noreturn cpu_die(void);
+extern void __cpu_die(unsigned int cpu);
+extern int __cpu_disable(void);
+#else
+static inline int smp_rescan_cpus(void) { return 0; }
+static inline void cpu_die(void) { }
+#endif
+
+#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/sparsemem.h b/arch/s390/include/asm/sparsemem.h
new file mode 100644
index 000000000..487428b6d
--- /dev/null
+++ b/arch/s390/include/asm/sparsemem.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_S390_SPARSEMEM_H
+#define _ASM_S390_SPARSEMEM_H
+
+#define SECTION_SIZE_BITS	28
+#define MAX_PHYSMEM_BITS	46
+
+#endif /* _ASM_S390_SPARSEMEM_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
new file mode 100644
index 000000000..0e37cd041
--- /dev/null
+++ b/arch/s390/include/asm/spinlock.h
@@ -0,0 +1,280 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/spinlock.h"
+ */
+
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <linux/smp.h>
+
+#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+
+extern int spin_retry;
+
+static inline int
+_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
+{
+	return __sync_bool_compare_and_swap(lock, old, new);
+}
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
+void arch_lock_relax(unsigned int cpu);
+
+void arch_spin_lock_wait(arch_spinlock_t *);
+int arch_spin_trylock_retry(arch_spinlock_t *);
+void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
+
+static inline void arch_spin_relax(arch_spinlock_t *lock)
+{
+	arch_lock_relax(lock->lock);
+}
+
+static inline u32 arch_spin_lockval(int cpu)
+{
+	return ~cpu;
+}
+
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.lock == 0;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lp)
+{
+	return ACCESS_ONCE(lp->lock) != 0;
+}
+
+static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
+{
+	barrier();
+	return likely(arch_spin_value_unlocked(*lp) &&
+		      _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lp)
+{
+	if (!arch_spin_trylock_once(lp))
+		arch_spin_lock_wait(lp);
+}
+
+static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
+					unsigned long flags)
+{
+	if (!arch_spin_trylock_once(lp))
+		arch_spin_lock_wait_flags(lp, flags);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lp)
+{
+	if (!arch_spin_trylock_once(lp))
+		return arch_spin_trylock_retry(lp);
+	return 1;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lp)
+{
+	typecheck(unsigned int, lp->lock);
+	asm volatile(
+		__ASM_BARRIER
+		"st	%1,%0\n"
+		: "+Q" (lp->lock)
+		: "d" (0)
+		: "cc", "memory");
+}
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	while (arch_spin_is_locked(lock))
+		arch_spin_relax(lock);
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_read_can_lock(x) ((int)(x)->lock >= 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define arch_write_can_lock(x) ((x)->lock == 0)
+
+extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
+extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+static inline int arch_read_trylock_once(arch_rwlock_t *rw)
+{
+	unsigned int old = ACCESS_ONCE(rw->lock);
+	return likely((int) old >= 0 &&
+		      _raw_compare_and_swap(&rw->lock, old, old + 1));
+}
+
+static inline int arch_write_trylock_once(arch_rwlock_t *rw)
+{
+	unsigned int old = ACCESS_ONCE(rw->lock);
+	return likely(old == 0 &&
+		      _raw_compare_and_swap(&rw->lock, 0, 0x80000000));
+}
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __RAW_OP_OR	"lao"
+#define __RAW_OP_AND	"lan"
+#define __RAW_OP_ADD	"laa"
+
+#define __RAW_LOCK(ptr, op_val, op_string)		\
+({							\
+	unsigned int old_val;				\
+							\
+	typecheck(unsigned int *, ptr);			\
+	asm volatile(					\
+		op_string "	%0,%2,%1\n"		\
+		"bcr	14,0\n"				\
+		: "=d" (old_val), "+Q" (*ptr)		\
+		: "d" (op_val)				\
+		: "cc", "memory");			\
+	old_val;					\
+})
+
+#define __RAW_UNLOCK(ptr, op_val, op_string)		\
+({							\
+	unsigned int old_val;				\
+							\
+	typecheck(unsigned int *, ptr);			\
+	asm volatile(					\
+		"bcr	14,0\n"				\
+		op_string "	%0,%2,%1\n"		\
+		: "=d" (old_val), "+Q" (*ptr)		\
+		: "d" (op_val)				\
+		: "cc", "memory");			\
+	old_val;					\
+})
+
+extern void _raw_read_lock_wait(arch_rwlock_t *lp);
+extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev);
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	unsigned int old;
+
+	old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD);
+	if ((int) old < 0)
+		_raw_read_lock_wait(rw);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	__RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	unsigned int old;
+
+	old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
+	if (old != 0)
+		_raw_write_lock_wait(rw, old);
+	rw->owner = SPINLOCK_LOCKVAL;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	rw->owner = 0;
+	__RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND);
+}
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+extern void _raw_read_lock_wait(arch_rwlock_t *lp);
+extern void _raw_write_lock_wait(arch_rwlock_t *lp);
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	if (!arch_read_trylock_once(rw))
+		_raw_read_lock_wait(rw);
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	unsigned int old;
+
+	do {
+		old = ACCESS_ONCE(rw->lock);
+	} while (!_raw_compare_and_swap(&rw->lock, old, old - 1));
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	if (!arch_write_trylock_once(rw))
+		_raw_write_lock_wait(rw);
+	rw->owner = SPINLOCK_LOCKVAL;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	typecheck(unsigned int, rw->lock);
+
+	rw->owner = 0;
+	asm volatile(
+		__ASM_BARRIER
+		"st	%1,%0\n"
+		: "+Q" (rw->lock)
+		: "d" (0)
+		: "cc", "memory");
+}
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	if (!arch_read_trylock_once(rw))
+		return _raw_read_trylock_retry(rw);
+	return 1;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw))
+		return 0;
+	rw->owner = SPINLOCK_LOCKVAL;
+	return 1;
+}
+
+static inline void arch_read_relax(arch_rwlock_t *rw)
+{
+	arch_lock_relax(rw->owner);
+}
+
+static inline void arch_write_relax(arch_rwlock_t *rw)
+{
+	arch_lock_relax(rw->owner);
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
new file mode 100644
index 000000000..d84b69392
--- /dev/null
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -0,0 +1,21 @@
+#ifndef __ASM_SPINLOCK_TYPES_H
+#define __ASM_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	unsigned int lock;
+} __attribute__ ((aligned (4))) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
+
+typedef struct {
+	unsigned int lock;
+	unsigned int owner;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
new file mode 100644
index 000000000..8662f5c8e
--- /dev/null
+++ b/arch/s390/include/asm/string.h
@@ -0,0 +1,142 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#ifndef _S390_STRING_H_
+#define _S390_STRING_H_
+
+#ifndef _LINUX_TYPES_H
+#include <linux/types.h>
+#endif
+
+#define __HAVE_ARCH_MEMCHR	/* inline & arch function */
+#define __HAVE_ARCH_MEMCMP	/* arch function */
+#define __HAVE_ARCH_MEMCPY	/* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMSCAN	/* inline & arch function */
+#define __HAVE_ARCH_MEMSET	/* gcc builtin & arch function */
+#define __HAVE_ARCH_STRCAT	/* inline & arch function */
+#define __HAVE_ARCH_STRCMP	/* arch function */
+#define __HAVE_ARCH_STRCPY	/* inline & arch function */
+#define __HAVE_ARCH_STRLCAT	/* arch function */
+#define __HAVE_ARCH_STRLCPY	/* arch function */
+#define __HAVE_ARCH_STRLEN	/* inline & arch function */
+#define __HAVE_ARCH_STRNCAT	/* arch function */
+#define __HAVE_ARCH_STRNCPY	/* arch function */
+#define __HAVE_ARCH_STRNLEN	/* inline & arch function */
+#define __HAVE_ARCH_STRRCHR	/* arch function */
+#define __HAVE_ARCH_STRSTR	/* arch function */
+
+/* Prototypes for non-inlined arch strings functions. */
+extern int memcmp(const void *, const void *, size_t);
+extern void *memcpy(void *, const void *, size_t);
+extern void *memset(void *, int, size_t);
+extern int strcmp(const char *,const char *);
+extern size_t strlcat(char *, const char *, size_t);
+extern size_t strlcpy(char *, const char *, size_t);
+extern char *strncat(char *, const char *, size_t);
+extern char *strncpy(char *, const char *, size_t);
+extern char *strrchr(const char *, int);
+extern char *strstr(const char *, const char *);
+
+#undef __HAVE_ARCH_MEMMOVE
+#undef __HAVE_ARCH_STRCHR
+#undef __HAVE_ARCH_STRNCHR
+#undef __HAVE_ARCH_STRNCMP
+#undef __HAVE_ARCH_STRPBRK
+#undef __HAVE_ARCH_STRSEP
+#undef __HAVE_ARCH_STRSPN
+
+#if !defined(IN_ARCH_STRING_C)
+
+static inline void *memchr(const void * s, int c, size_t n)
+{
+	register int r0 asm("0") = (char) c;
+	const void *ret = s + n;
+
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		"	jl	1f\n"
+		"	la	%0,0\n"
+		"1:"
+		: "+a" (ret), "+&a" (s) : "d" (r0) : "cc");
+	return (void *) ret;
+}
+
+static inline void *memscan(void *s, int c, size_t n)
+{
+	register int r0 asm("0") = (char) c;
+	const void *ret = s + n;
+
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		: "+a" (ret), "+&a" (s) : "d" (r0) : "cc");
+	return (void *) ret;
+}
+
+static inline char *strcat(char *dst, const char *src)
+{
+	register int r0 asm("0") = 0;
+	unsigned long dummy;
+	char *ret = dst;
+
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		"1:	mvst	%0,%2\n"
+		"	jo	1b"
+		: "=&a" (dummy), "+a" (dst), "+a" (src)
+		: "d" (r0), "0" (0) : "cc", "memory" );
+	return ret;
+}
+
+static inline char *strcpy(char *dst, const char *src)
+{
+	register int r0 asm("0") = 0;
+	char *ret = dst;
+
+	asm volatile(
+		"0:	mvst	%0,%1\n"
+		"	jo	0b"
+		: "+&a" (dst), "+&a" (src) : "d" (r0)
+		: "cc", "memory");
+	return ret;
+}
+
+static inline size_t strlen(const char *s)
+{
+	register unsigned long r0 asm("0") = 0;
+	const char *tmp = s;
+
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b"
+		: "+d" (r0), "+a" (tmp) :  : "cc");
+	return r0 - (unsigned long) s;
+}
+
+static inline size_t strnlen(const char * s, size_t n)
+{
+	register int r0 asm("0") = 0;
+	const char *tmp = s;
+	const char *end = s + n;
+
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b"
+		: "+a" (end), "+a" (tmp) : "d" (r0)  : "cc");
+	return end - s;
+}
+#else /* IN_ARCH_STRING_C */
+void *memchr(const void * s, int c, size_t n);
+void *memscan(void *s, int c, size_t n);
+char *strcat(char *dst, const char *src);
+char *strcpy(char *dst, const char *src);
+size_t strlen(const char *s);
+size_t strnlen(const char * s, size_t n);
+#endif /* !IN_ARCH_STRING_C */
+
+#endif /* __S390_STRING_H_ */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
new file mode 100644
index 000000000..d62e7a696
--- /dev/null
+++ b/arch/s390/include/asm/switch_to.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __ASM_SWITCH_TO_H
+#define __ASM_SWITCH_TO_H
+
+#include <linux/thread_info.h>
+#include <asm/ptrace.h>
+
+extern struct task_struct *__switch_to(void *, void *);
+extern void update_cr_regs(struct task_struct *task);
+
+static inline int test_fp_ctl(u32 fpc)
+{
+	u32 orig_fpc;
+	int rc;
+
+	asm volatile(
+		"	efpc    %1\n"
+		"	sfpc	%2\n"
+		"0:	sfpc	%1\n"
+		"	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc), "=d" (orig_fpc)
+		: "d" (fpc), "0" (-EINVAL));
+	return rc;
+}
+
+static inline void save_fp_ctl(u32 *fpc)
+{
+	asm volatile(
+		"       stfpc   %0\n"
+		: "+Q" (*fpc));
+}
+
+static inline int restore_fp_ctl(u32 *fpc)
+{
+	int rc;
+
+	asm volatile(
+		"	lfpc    %1\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
+	return rc;
+}
+
+static inline void save_fp_regs(freg_t *fprs)
+{
+	asm volatile("std 0,%0" : "=Q" (fprs[0]));
+	asm volatile("std 2,%0" : "=Q" (fprs[2]));
+	asm volatile("std 4,%0" : "=Q" (fprs[4]));
+	asm volatile("std 6,%0" : "=Q" (fprs[6]));
+	asm volatile("std 1,%0" : "=Q" (fprs[1]));
+	asm volatile("std 3,%0" : "=Q" (fprs[3]));
+	asm volatile("std 5,%0" : "=Q" (fprs[5]));
+	asm volatile("std 7,%0" : "=Q" (fprs[7]));
+	asm volatile("std 8,%0" : "=Q" (fprs[8]));
+	asm volatile("std 9,%0" : "=Q" (fprs[9]));
+	asm volatile("std 10,%0" : "=Q" (fprs[10]));
+	asm volatile("std 11,%0" : "=Q" (fprs[11]));
+	asm volatile("std 12,%0" : "=Q" (fprs[12]));
+	asm volatile("std 13,%0" : "=Q" (fprs[13]));
+	asm volatile("std 14,%0" : "=Q" (fprs[14]));
+	asm volatile("std 15,%0" : "=Q" (fprs[15]));
+}
+
+static inline void restore_fp_regs(freg_t *fprs)
+{
+	asm volatile("ld 0,%0" : : "Q" (fprs[0]));
+	asm volatile("ld 2,%0" : : "Q" (fprs[2]));
+	asm volatile("ld 4,%0" : : "Q" (fprs[4]));
+	asm volatile("ld 6,%0" : : "Q" (fprs[6]));
+	asm volatile("ld 1,%0" : : "Q" (fprs[1]));
+	asm volatile("ld 3,%0" : : "Q" (fprs[3]));
+	asm volatile("ld 5,%0" : : "Q" (fprs[5]));
+	asm volatile("ld 7,%0" : : "Q" (fprs[7]));
+	asm volatile("ld 8,%0" : : "Q" (fprs[8]));
+	asm volatile("ld 9,%0" : : "Q" (fprs[9]));
+	asm volatile("ld 10,%0" : : "Q" (fprs[10]));
+	asm volatile("ld 11,%0" : : "Q" (fprs[11]));
+	asm volatile("ld 12,%0" : : "Q" (fprs[12]));
+	asm volatile("ld 13,%0" : : "Q" (fprs[13]));
+	asm volatile("ld 14,%0" : : "Q" (fprs[14]));
+	asm volatile("ld 15,%0" : : "Q" (fprs[15]));
+}
+
+static inline void save_vx_regs(__vector128 *vxrs)
+{
+	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
+
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		: "=Q" (*(addrtype *) vxrs) : : "1");
+}
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+	unsigned long cr0, flags;
+
+	flags = arch_local_irq_save();
+	__ctl_store(cr0, 0, 0);
+	__ctl_set_bit(0, 17);
+	__ctl_set_bit(0, 18);
+	save_vx_regs(vxrs);
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
+}
+
+static inline void restore_vx_regs(__vector128 *vxrs)
+{
+	typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
+
+	asm volatile(
+		"	la	1,%0\n"
+		"	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+		: : "Q" (*(addrtype *) vxrs) : "1");
+}
+
+static inline void save_fp_vx_regs(struct task_struct *task)
+{
+	if (task->thread.vxrs)
+		save_vx_regs(task->thread.vxrs);
+	else
+		save_fp_regs(task->thread.fp_regs.fprs);
+}
+
+static inline void restore_fp_vx_regs(struct task_struct *task)
+{
+	if (task->thread.vxrs)
+		restore_vx_regs(task->thread.vxrs);
+	else
+		restore_fp_regs(task->thread.fp_regs.fprs);
+}
+
+static inline void save_access_regs(unsigned int *acrs)
+{
+	typedef struct { int _[NUM_ACRS]; } acrstype;
+
+	asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs));
+}
+
+static inline void restore_access_regs(unsigned int *acrs)
+{
+	typedef struct { int _[NUM_ACRS]; } acrstype;
+
+	asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs));
+}
+
+#define switch_to(prev,next,last) do {					\
+	if (prev->mm) {							\
+		save_fp_ctl(&prev->thread.fp_regs.fpc);			\
+		save_fp_vx_regs(prev);					\
+		save_access_regs(&prev->thread.acrs[0]);		\
+		save_ri_cb(prev->thread.ri_cb);				\
+	}								\
+	if (next->mm) {							\
+		update_cr_regs(next);					\
+		restore_fp_ctl(&next->thread.fp_regs.fpc);		\
+		restore_fp_vx_regs(next);				\
+		restore_access_regs(&next->thread.acrs[0]);		\
+		restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb);	\
+	}								\
+	prev = __switch_to(prev,next);					\
+} while (0)
+
+#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
new file mode 100644
index 000000000..6ba0bf928
--- /dev/null
+++ b/arch/s390/include/asm/syscall.h
@@ -0,0 +1,100 @@
+/*
+ * Access to user system call parameters and results
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H	1
+
+#include <uapi/linux/audit.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+/*
+ * The syscall table always contains 32 bit pointers since we know that the
+ * address of the function to be called is (way) below 4GB.  So the "int"
+ * type here is what we want [need] for both 32 bit and 64 bit systems.
+ */
+extern const unsigned int sys_call_table[];
+extern const unsigned int sys_call_table_emu[];
+
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	return test_pt_regs_flag(regs, PIF_SYSCALL) ?
+		(regs->int_code & 0xffff) : -1;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	regs->gprs[2] = regs->orig_gpr2;
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	return regs->gprs[2];
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	regs->gprs[2] = error ? error : val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	unsigned long mask = -1UL;
+
+	BUG_ON(i + n > 6);
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		mask = 0xffffffff;
+#endif
+	while (n-- > 0)
+		if (i + n > 0)
+			args[n] = regs->gprs[2 + i + n] & mask;
+	if (i == 0)
+		args[0] = regs->orig_gpr2 & mask;
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	BUG_ON(i + n > 6);
+	while (n-- > 0)
+		if (i + n > 0)
+			regs->gprs[2 + i + n] = args[n];
+	if (i == 0)
+		regs->orig_gpr2 = args[0];
+}
+
+static inline int syscall_get_arch(void)
+{
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(current, TIF_31BIT))
+		return AUDIT_ARCH_S390;
+#endif
+	return AUDIT_ARCH_S390X;
+}
+#endif	/* _ASM_SYSCALL_H */
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
new file mode 100644
index 000000000..f7054a892
--- /dev/null
+++ b/arch/s390/include/asm/sysinfo.h
@@ -0,0 +1,179 @@
+/*
+ * definition for store system information stsi
+ *
+ * Copyright IBM Corp. 2001, 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Ulrich Weigand <weigand@de.ibm.com>
+ *		 Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __ASM_S390_SYSINFO_H
+#define __ASM_S390_SYSINFO_H
+
+#include <asm/bitsperlong.h>
+#include <linux/uuid.h>
+
+struct sysinfo_1_1_1 {
+	unsigned char p:1;
+	unsigned char :6;
+	unsigned char t:1;
+	unsigned char :8;
+	unsigned char ccr;
+	unsigned char cai;
+	char reserved_0[28];
+	char manufacturer[16];
+	char type[4];
+	char reserved_1[12];
+	char model_capacity[16];
+	char sequence[16];
+	char plant[4];
+	char model[16];
+	char model_perm_cap[16];
+	char model_temp_cap[16];
+	unsigned int model_cap_rating;
+	unsigned int model_perm_cap_rating;
+	unsigned int model_temp_cap_rating;
+	unsigned char typepct[5];
+	unsigned char reserved_2[3];
+	unsigned int ncr;
+	unsigned int npr;
+	unsigned int ntr;
+};
+
+struct sysinfo_1_2_1 {
+	char reserved_0[80];
+	char sequence[16];
+	char plant[4];
+	char reserved_1[2];
+	unsigned short cpu_address;
+};
+
+struct sysinfo_1_2_2 {
+	char format;
+	char reserved_0[1];
+	unsigned short acc_offset;
+	char reserved_1[20];
+	unsigned int nominal_cap;
+	unsigned int secondary_cap;
+	unsigned int capability;
+	unsigned short cpus_total;
+	unsigned short cpus_configured;
+	unsigned short cpus_standby;
+	unsigned short cpus_reserved;
+	unsigned short adjustment[0];
+};
+
+struct sysinfo_1_2_2_extension {
+	unsigned int alt_capability;
+	unsigned short alt_adjustment[0];
+};
+
+struct sysinfo_2_2_1 {
+	char reserved_0[80];
+	char sequence[16];
+	char plant[4];
+	unsigned short cpu_id;
+	unsigned short cpu_address;
+};
+
+struct sysinfo_2_2_2 {
+	char reserved_0[32];
+	unsigned short lpar_number;
+	char reserved_1;
+	unsigned char characteristics;
+	unsigned short cpus_total;
+	unsigned short cpus_configured;
+	unsigned short cpus_standby;
+	unsigned short cpus_reserved;
+	char name[8];
+	unsigned int caf;
+	char reserved_2[8];
+	unsigned char mt_installed;
+	unsigned char mt_general;
+	unsigned char mt_psmtid;
+	char reserved_3[5];
+	unsigned short cpus_dedicated;
+	unsigned short cpus_shared;
+};
+
+#define LPAR_CHAR_DEDICATED	(1 << 7)
+#define LPAR_CHAR_SHARED	(1 << 6)
+#define LPAR_CHAR_LIMITED	(1 << 5)
+
+struct sysinfo_3_2_2 {
+	char reserved_0[31];
+	unsigned char :4;
+	unsigned char count:4;
+	struct {
+		char reserved_0[4];
+		unsigned short cpus_total;
+		unsigned short cpus_configured;
+		unsigned short cpus_standby;
+		unsigned short cpus_reserved;
+		char name[8];
+		unsigned int caf;
+		char cpi[16];
+		char reserved_1[3];
+		char ext_name_encoding;
+		unsigned int reserved_2;
+		uuid_be uuid;
+	} vm[8];
+	char reserved_3[1504];
+	char ext_names[8][256];
+};
+
+extern int topology_max_mnest;
+
+#define TOPOLOGY_CORE_BITS	64
+#define TOPOLOGY_NR_MAG		6
+
+struct topology_core {
+	unsigned char nl;
+	unsigned char reserved0[3];
+	unsigned char :6;
+	unsigned char pp:2;
+	unsigned char reserved1;
+	unsigned short origin;
+	unsigned long mask[TOPOLOGY_CORE_BITS / BITS_PER_LONG];
+};
+
+struct topology_container {
+	unsigned char nl;
+	unsigned char reserved[6];
+	unsigned char id;
+};
+
+union topology_entry {
+	unsigned char nl;
+	struct topology_core cpu;
+	struct topology_container container;
+};
+
+struct sysinfo_15_1_x {
+	unsigned char reserved0[2];
+	unsigned short length;
+	unsigned char mag[TOPOLOGY_NR_MAG];
+	unsigned char reserved1;
+	unsigned char mnest;
+	unsigned char reserved2[4];
+	union topology_entry tle[0];
+};
+
+int stsi(void *sysinfo, int fc, int sel1, int sel2);
+
+/*
+ * Service level reporting interface.
+ */
+struct service_level {
+	struct list_head list;
+	void (*seq_print)(struct seq_file *, struct service_level *);
+};
+
+int register_service_level(struct service_level *);
+int unregister_service_level(struct service_level *);
+
+#endif /* __ASM_S390_SYSINFO_H */
diff --git a/arch/s390/include/asm/termios.h b/arch/s390/include/asm/termios.h
new file mode 100644
index 000000000..db028d17f
--- /dev/null
+++ b/arch/s390/include/asm/termios.h
@@ -0,0 +1,25 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/termios.h"
+ */
+#ifndef _S390_TERMIOS_H
+#define _S390_TERMIOS_H
+
+#include <uapi/asm/termios.h>
+
+
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		vtime=\0	vmin=\1		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		eol=\0
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2))
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2))
+
+#include <asm-generic/termios-base.h>
+
+#endif	/* _S390_TERMIOS_H */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
new file mode 100644
index 000000000..4c27ec764
--- /dev/null
+++ b/arch/s390/include/asm/thread_info.h
@@ -0,0 +1,99 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 2002, 2006
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+/*
+ * Size of kernel stack for each process
+ */
+#define THREAD_ORDER 2
+#define ASYNC_ORDER  2
+
+#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+#define ASYNC_SIZE  (PAGE_SIZE << ASYNC_ORDER)
+
+#ifndef __ASSEMBLY__
+#include <asm/lowcore.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants must also be changed
+ */
+struct thread_info {
+	struct task_struct	*task;		/* main task structure */
+	unsigned long		flags;		/* low level flags */
+	unsigned long		sys_call_table;	/* System call table address */
+	unsigned int		cpu;		/* current CPU */
+	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
+	unsigned int		system_call;
+	__u64			user_timer;
+	__u64			system_timer;
+	unsigned long		last_break;	/* last breaking-event-address. */
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.task		= &tsk,			\
+	.flags		= 0,			\
+	.cpu		= 0,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
+}
+
+#define init_thread_info	(init_thread_union.thread_info)
+#define init_stack		(init_thread_union.stack)
+
+/* how to get the thread information struct from C */
+static inline struct thread_info *current_thread_info(void)
+{
+	return (struct thread_info *) S390_lowcore.thread_info;
+}
+
+void arch_release_task_struct(struct task_struct *tsk);
+
+#define THREAD_SIZE_ORDER THREAD_ORDER
+
+#endif
+
+/*
+ * thread information flags bit numbers
+ */
+#define TIF_NOTIFY_RESUME	0	/* callback before returning to user */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_SYSCALL_TRACE	3	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
+#define TIF_SECCOMP		5	/* secure computing */
+#define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
+#define TIF_UPROBE		7	/* breakpointed or single-stepping */
+#define TIF_31BIT		16	/* 32bit process */
+#define TIF_MEMDIE		17	/* is terminating due to OOM killer */
+#define TIF_RESTORE_SIGMASK	18	/* restore signal mask in do_signal() */
+#define TIF_SINGLE_STEP		19	/* This task is single stepped */
+#define TIF_BLOCK_STEP		20	/* This task is block stepped */
+#define TIF_UPROBE_SINGLESTEP	21	/* This task is uprobe single stepped */
+
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_UPROBE		(1<<TIF_UPROBE)
+#define _TIF_31BIT		(1<<TIF_31BIT)
+#define _TIF_SINGLE_STEP	(1<<TIF_SINGLE_STEP)
+
+#define is_32bit_task()		(test_thread_flag(TIF_31BIT))
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
new file mode 100644
index 000000000..98eb2a579
--- /dev/null
+++ b/arch/s390/include/asm/timex.h
@@ -0,0 +1,163 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *
+ *  Derived from "include/asm-i386/timex.h"
+ *    Copyright (C) 1992, Linus Torvalds
+ */
+
+#ifndef _ASM_S390_TIMEX_H
+#define _ASM_S390_TIMEX_H
+
+#include <asm/lowcore.h>
+
+/* The value of the TOD clock for 1.1.1970. */
+#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
+
+/* Inline functions for clock register access. */
+static inline int set_tod_clock(__u64 time)
+{
+	int cc;
+
+	asm volatile(
+		"   sck   %1\n"
+		"   ipm   %0\n"
+		"   srl   %0,28\n"
+		: "=d" (cc) : "Q" (time) : "cc");
+	return cc;
+}
+
+static inline int store_tod_clock(__u64 *time)
+{
+	int cc;
+
+	asm volatile(
+		"   stck  %1\n"
+		"   ipm   %0\n"
+		"   srl   %0,28\n"
+		: "=d" (cc), "=Q" (*time) : : "cc");
+	return cc;
+}
+
+static inline void set_clock_comparator(__u64 time)
+{
+	asm volatile("sckc %0" : : "Q" (time));
+}
+
+static inline void store_clock_comparator(__u64 *time)
+{
+	asm volatile("stckc %0" : "=Q" (*time));
+}
+
+void clock_comparator_work(void);
+
+static inline unsigned long long local_tick_disable(void)
+{
+	unsigned long long old;
+
+	old = S390_lowcore.clock_comparator;
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return old;
+}
+
+static inline void local_tick_enable(unsigned long long comp)
+{
+	S390_lowcore.clock_comparator = comp;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+#define CLOCK_TICK_RATE		1193180 /* Underlying HZ */
+#define STORE_CLOCK_EXT_SIZE	16	/* stcke writes 16 bytes */
+
+typedef unsigned long long cycles_t;
+
+static inline void get_tod_clock_ext(char *clk)
+{
+	typedef struct { char _[STORE_CLOCK_EXT_SIZE]; } addrtype;
+
+	asm volatile("stcke %0" : "=Q" (*(addrtype *) clk) : : "cc");
+}
+
+static inline unsigned long long get_tod_clock(void)
+{
+	unsigned char clk[STORE_CLOCK_EXT_SIZE];
+
+	get_tod_clock_ext(clk);
+	return *((unsigned long long *)&clk[1]);
+}
+
+static inline unsigned long long get_tod_clock_fast(void)
+{
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+	unsigned long long clk;
+
+	asm volatile("stckf %0" : "=Q" (clk) : : "cc");
+	return clk;
+#else
+	return get_tod_clock();
+#endif
+}
+
+static inline cycles_t get_cycles(void)
+{
+	return (cycles_t) get_tod_clock() >> 2;
+}
+
+int get_sync_clock(unsigned long long *clock);
+void init_cpu_timer(void);
+unsigned long long monotonic_clock(void);
+
+void tod_to_timeval(__u64, struct timespec *);
+
+static inline
+void stck_to_timespec(unsigned long long stck, struct timespec *ts)
+{
+	tod_to_timeval(stck - TOD_UNIX_EPOCH, ts);
+}
+
+extern u64 sched_clock_base_cc;
+
+/**
+ * get_clock_monotonic - returns current time in clock rate units
+ *
+ * The caller must ensure that preemption is disabled.
+ * The clock and sched_clock_base get changed via stop_machine.
+ * Therefore preemption must be disabled when calling this
+ * function, otherwise the returned value is not guaranteed to
+ * be monotonic.
+ */
+static inline unsigned long long get_tod_clock_monotonic(void)
+{
+	return get_tod_clock() - sched_clock_base_cc;
+}
+
+/**
+ * tod_to_ns - convert a TOD format value to nanoseconds
+ * @todval: to be converted TOD format value
+ * Returns: number of nanoseconds that correspond to the TOD format value
+ *
+ * Converting a 64 Bit TOD format value to nanoseconds means that the value
+ * must be divided by 4.096. In order to achieve that we multiply with 125
+ * and divide by 512:
+ *
+ *    ns = (todval * 125) >> 9;
+ *
+ * In order to avoid an overflow with the multiplication we can rewrite this.
+ * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits)
+ * we end up with
+ *
+ *    ns = ((2^32 * th + tl) * 125 ) >> 9;
+ * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9);
+ *
+ */
+static inline unsigned long long tod_to_ns(unsigned long long todval)
+{
+	unsigned long long ns;
+
+	ns = ((todval >> 32) << 23) * 125;
+	ns += ((todval & 0xffffffff) * 125) >> 9;
+	return ns;
+}
+
+#endif
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
new file mode 100644
index 000000000..7a92e69c5
--- /dev/null
+++ b/arch/s390/include/asm/tlb.h
@@ -0,0 +1,148 @@
+#ifndef _S390_TLB_H
+#define _S390_TLB_H
+
+/*
+ * TLB flushing on s390 is complicated. The following requirement
+ * from the principles of operation is the most arduous:
+ *
+ * "A valid table entry must not be changed while it is attached
+ * to any CPU and may be used for translation by that CPU except to
+ * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY,
+ * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page
+ * table entry, or (3) make a change by means of a COMPARE AND SWAP
+ * AND PURGE instruction that purges the TLB."
+ *
+ * The modification of a pte of an active mm struct therefore is
+ * a two step process: i) invalidate the pte, ii) store the new pte.
+ * This is true for the page protection bit as well.
+ * The only possible optimization is to flush at the beginning of
+ * a tlb_gather_mmu cycle if the mm_struct is currently not in use.
+ *
+ * Pages used for the page tables is a different story. FIXME: more
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+struct mmu_gather {
+	struct mm_struct *mm;
+	struct mmu_table_batch *batch;
+	unsigned int fullmm;
+	unsigned long start, end;
+};
+
+struct mmu_table_batch {
+	struct rcu_head		rcu;
+	unsigned int		nr;
+	void			*tables[0];
+};
+
+#define MAX_TABLE_BATCH		\
+	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+static inline void tlb_gather_mmu(struct mmu_gather *tlb,
+				  struct mm_struct *mm,
+				  unsigned long start,
+				  unsigned long end)
+{
+	tlb->mm = mm;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
+	tlb->batch = NULL;
+}
+
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+	__tlb_flush_mm_lazy(tlb->mm);
+}
+
+static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+	tlb_table_flush(tlb);
+}
+
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	tlb_flush_mmu_tlbonly(tlb);
+	tlb_flush_mmu_free(tlb);
+}
+
+static inline void tlb_finish_mmu(struct mmu_gather *tlb,
+				  unsigned long start, unsigned long end)
+{
+	tlb_flush_mmu(tlb);
+}
+
+/*
+ * Release the page cache reference for a pte removed by
+ * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
+ * has already been freed, so just do free_page_and_swap_cache.
+ */
+static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	free_page_and_swap_cache(page);
+	return 1; /* avoid calling tlb_flush_mmu */
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+	free_page_and_swap_cache(page);
+}
+
+/*
+ * pte_free_tlb frees a pte table and clears the CRSTE for the
+ * page table from the tlb.
+ */
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+				unsigned long address)
+{
+	page_table_free_rcu(tlb, (unsigned long *) pte, address);
+}
+
+/*
+ * pmd_free_tlb frees a pmd table and clears the CRSTE for the
+ * segment table entry from the tlb.
+ * If the mm uses a two level page table the single pmd is freed
+ * as the pgd. pmd_free_tlb checks the asce_limit against 2GB
+ * to avoid the double free of the pmd in this case.
+ */
+static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				unsigned long address)
+{
+	if (tlb->mm->context.asce_limit <= (1UL << 31))
+		return;
+	pgtable_pmd_page_dtor(virt_to_page(pmd));
+	tlb_remove_table(tlb, pmd);
+}
+
+/*
+ * pud_free_tlb frees a pud table and clears the CRSTE for the
+ * region third table entry from the tlb.
+ * If the mm uses a three level page table the single pud is freed
+ * as the pgd. pud_free_tlb checks the asce_limit against 4TB
+ * to avoid the double free of the pud in this case.
+ */
+static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				unsigned long address)
+{
+	if (tlb->mm->context.asce_limit <= (1UL << 42))
+		return;
+	tlb_remove_table(tlb, pud);
+}
+
+#define tlb_start_vma(tlb, vma)			do { } while (0)
+#define tlb_end_vma(tlb, vma)			do { } while (0)
+#define tlb_remove_tlb_entry(tlb, ptep, addr)	do { } while (0)
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr)	do { } while (0)
+#define tlb_migrate_finish(mm)			do { } while (0)
+
+#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
new file mode 100644
index 000000000..ca148f7c3
--- /dev/null
+++ b/arch/s390/include/asm/tlbflush.h
@@ -0,0 +1,204 @@
+#ifndef _S390_TLBFLUSH_H
+#define _S390_TLBFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/pgalloc.h>
+
+/*
+ * Flush all TLB entries on the local CPU.
+ */
+static inline void __tlb_flush_local(void)
+{
+	asm volatile("ptlb" : : : "memory");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs
+ */
+static inline void __tlb_flush_idte(unsigned long asce)
+{
+	/* Global TLB flush for the mm */
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,0,%0,%1,0"
+		: : "a" (2048), "a" (asce) : "cc");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on the local CPU
+ */
+static inline void __tlb_flush_idte_local(unsigned long asce)
+{
+	/* Local TLB flush for the mm */
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,0,%0,%1,1"
+		: : "a" (2048), "a" (asce) : "cc");
+}
+
+#ifdef CONFIG_SMP
+void smp_ptlb_all(void);
+
+/*
+ * Flush all TLB entries on all CPUs.
+ */
+static inline void __tlb_flush_global(void)
+{
+	register unsigned long reg2 asm("2");
+	register unsigned long reg3 asm("3");
+	register unsigned long reg4 asm("4");
+	long dummy;
+
+	dummy = 0;
+	reg2 = reg3 = 0;
+	reg4 = ((unsigned long) &dummy) + 1;
+	asm volatile(
+		"	csp	%0,%2"
+		: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
+}
+
+/*
+ * Flush TLB entries for a specific mm on all CPUs (in case gmap is used
+ * this implicates multiple ASCEs!).
+ */
+static inline void __tlb_flush_full(struct mm_struct *mm)
+{
+	preempt_disable();
+	atomic_add(0x10000, &mm->context.attach_count);
+	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+		/* Local TLB flush */
+		__tlb_flush_local();
+	} else {
+		/* Global TLB flush */
+		__tlb_flush_global();
+		/* Reset TLB flush mask */
+		if (MACHINE_HAS_TLB_LC)
+			cpumask_copy(mm_cpumask(mm),
+				     &mm->context.cpu_attach_mask);
+	}
+	atomic_sub(0x10000, &mm->context.attach_count);
+	preempt_enable();
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs.
+ */
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+{
+	int active, count;
+
+	preempt_disable();
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+		__tlb_flush_idte_local(asce);
+	} else {
+		if (MACHINE_HAS_IDTE)
+			__tlb_flush_idte(asce);
+		else
+			__tlb_flush_global();
+		/* Reset TLB flush mask */
+		if (MACHINE_HAS_TLB_LC)
+			cpumask_copy(mm_cpumask(mm),
+				     &mm->context.cpu_attach_mask);
+	}
+	atomic_sub(0x10000, &mm->context.attach_count);
+	preempt_enable();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_idte((unsigned long) init_mm.pgd |
+				 init_mm.context.asce_bits);
+	else
+		__tlb_flush_global();
+}
+#else
+#define __tlb_flush_global()	__tlb_flush_local()
+#define __tlb_flush_full(mm)	__tlb_flush_local()
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs.
+ */
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+{
+	if (MACHINE_HAS_TLB_LC)
+		__tlb_flush_idte_local(asce);
+	else
+		__tlb_flush_local();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+	if (MACHINE_HAS_TLB_LC)
+		__tlb_flush_idte_local((unsigned long) init_mm.pgd |
+				       init_mm.context.asce_bits);
+	else
+		__tlb_flush_local();
+}
+#endif
+
+static inline void __tlb_flush_mm(struct mm_struct * mm)
+{
+	/*
+	 * If the machine has IDTE we prefer to do a per mm flush
+	 * on all cpus instead of doing a local flush if the mm
+	 * only ran on the local cpu.
+	 */
+	if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
+		__tlb_flush_asce(mm, (unsigned long) mm->pgd |
+				 mm->context.asce_bits);
+	else
+		__tlb_flush_full(mm);
+}
+
+static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
+{
+	if (mm->context.flush_mm) {
+		__tlb_flush_mm(mm);
+		mm->context.flush_mm = 0;
+	}
+}
+
+/*
+ * TLB flushing:
+ *  flush_tlb() - flushes the current mm struct TLBs
+ *  flush_tlb_all() - flushes all processes TLBs
+ *  flush_tlb_mm(mm) - flushes the specified mm context TLB's
+ *  flush_tlb_page(vma, vmaddr) - flushes one page
+ *  flush_tlb_range(vma, start, end) - flushes a range of pages
+ *  flush_tlb_kernel_range(start, end) - flushes a range of kernel pages
+ */
+
+/*
+ * flush_tlb_mm goes together with ptep_set_wrprotect for the
+ * copy_page_range operation and flush_tlb_range is related to
+ * ptep_get_and_clear for change_protection. ptep_set_wrprotect and
+ * ptep_get_and_clear do not flush the TLBs directly if the mm has
+ * only one user. At the end of the update the flush_tlb_mm and
+ * flush_tlb_range functions need to do the flush.
+ */
+#define flush_tlb()				do { } while (0)
+#define flush_tlb_all()				do { } while (0)
+#define flush_tlb_page(vma, addr)		do { } while (0)
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	__tlb_flush_mm_lazy(mm);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+	__tlb_flush_mm_lazy(vma->vm_mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	__tlb_flush_kernel();
+}
+
+#endif /* _S390_TLBFLUSH_H */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
new file mode 100644
index 000000000..b1453a2ae
--- /dev/null
+++ b/arch/s390/include/asm/topology.h
@@ -0,0 +1,56 @@
+#ifndef _ASM_S390_TOPOLOGY_H
+#define _ASM_S390_TOPOLOGY_H
+
+#include <linux/cpumask.h>
+
+struct sysinfo_15_1_x;
+struct cpu;
+
+#ifdef CONFIG_SCHED_BOOK
+
+struct cpu_topology_s390 {
+	unsigned short thread_id;
+	unsigned short core_id;
+	unsigned short socket_id;
+	unsigned short book_id;
+	cpumask_t thread_mask;
+	cpumask_t core_mask;
+	cpumask_t book_mask;
+};
+
+DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
+
+#define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
+#define topology_thread_id(cpu)		  (per_cpu(cpu_topology, cpu).thread_id)
+#define topology_thread_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).thread_mask)
+#define topology_core_id(cpu)		  (per_cpu(cpu_topology, cpu).core_id)
+#define topology_core_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).core_mask)
+#define topology_book_id(cpu)		  (per_cpu(cpu_topology, cpu).book_id)
+#define topology_book_cpumask(cpu)	  (&per_cpu(cpu_topology, cpu).book_mask)
+
+#define mc_capable() 1
+
+int topology_cpu_init(struct cpu *);
+int topology_set_cpu_management(int fc);
+void topology_schedule_update(void);
+void store_topology(struct sysinfo_15_1_x *info);
+void topology_expect_change(void);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+
+#else /* CONFIG_SCHED_BOOK */
+
+static inline void topology_schedule_update(void) { }
+static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
+static inline void topology_expect_change(void) { }
+
+#endif /* CONFIG_SCHED_BOOK */
+
+#define POLARIZATION_UNKNOWN	(-1)
+#define POLARIZATION_HRZ	(0)
+#define POLARIZATION_VL		(1)
+#define POLARIZATION_VM		(2)
+#define POLARIZATION_VH		(3)
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/include/asm/types.h b/arch/s390/include/asm/types.h
new file mode 100644
index 000000000..6740f4f97
--- /dev/null
+++ b/arch/s390/include/asm/types.h
@@ -0,0 +1,11 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/types.h"
+ */
+#ifndef _S390_TYPES_H
+#define _S390_TYPES_H
+
+#include <uapi/asm/types.h>
+
+#endif /* _S390_TYPES_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
new file mode 100644
index 000000000..d64a7a621
--- /dev/null
+++ b/arch/s390/include/asm/uaccess.h
@@ -0,0 +1,377 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "include/asm-i386/uaccess.h"
+ */
+#ifndef __S390_UACCESS_H
+#define __S390_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <asm/ctl_reg.h>
+
+#define VERIFY_READ     0
+#define VERIFY_WRITE    1
+
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(a)  ((mm_segment_t) { (a) })
+
+
+#define KERNEL_DS       MAKE_MM_SEG(0)
+#define USER_DS         MAKE_MM_SEG(1)
+
+#define get_ds()        (KERNEL_DS)
+#define get_fs()        (current->thread.mm_segment)
+
+#define set_fs(x) \
+({									\
+	unsigned long __pto;						\
+	current->thread.mm_segment = (x);				\
+	__pto = current->thread.mm_segment.ar4 ?			\
+		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
+	__ctl_load(__pto, 7, 7);					\
+})
+
+#define segment_eq(a,b) ((a).ar4 == (b).ar4)
+
+static inline int __range_ok(unsigned long addr, unsigned long size)
+{
+	return 1;
+}
+
+#define __access_ok(addr, size)				\
+({							\
+	__chk_user_ptr(addr);				\
+	__range_ok((unsigned long)(addr), (size));	\
+})
+
+#define access_ok(type, addr, size) __access_ok(addr, size)
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+	int insn, fixup;
+};
+
+static inline unsigned long extable_insn(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->insn + x->insn;
+}
+
+static inline unsigned long extable_fixup(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->fixup + x->fixup;
+}
+
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
+/**
+ * __copy_from_user: - Copy a block of data from user space, with less checking.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:	  Number of bytes to copy.
+ *
+ * Context: User context only.	This function may sleep.
+ *
+ * Copy data from user space to kernel space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long __must_check __copy_from_user(void *to, const void __user *from,
+					    unsigned long n);
+
+/**
+ * __copy_to_user: - Copy a block of data into user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:	  Number of bytes to copy.
+ *
+ * Context: User context only.	This function may sleep.
+ *
+ * Copy data from kernel space to user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long __must_check __copy_to_user(void __user *to, const void *from,
+					  unsigned long n);
+
+#define __copy_to_user_inatomic __copy_to_user
+#define __copy_from_user_inatomic __copy_from_user
+
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+
+#define __put_get_user_asm(to, from, size, spec)		\
+({								\
+	register unsigned long __reg0 asm("0") = spec;		\
+	int __rc;						\
+								\
+	asm volatile(						\
+		"0:	mvcos	%1,%3,%2\n"			\
+		"1:	xr	%0,%0\n"			\
+		"2:\n"						\
+		".pushsection .fixup, \"ax\"\n"			\
+		"3:	lhi	%0,%5\n"			\
+		"	jg	2b\n"				\
+		".popsection\n"					\
+		EX_TABLE(0b,3b) EX_TABLE(1b,3b)			\
+		: "=d" (__rc), "=Q" (*(to))			\
+		: "d" (size), "Q" (*(from)),			\
+		  "d" (__reg0), "K" (-EFAULT)			\
+		: "cc");					\
+	__rc;							\
+})
+
+#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL)
+#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL)
+
+#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
+static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+{
+	size = __copy_to_user(ptr, x, size);
+	return size ? -EFAULT : 0;
+}
+
+static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+{
+	size = __copy_from_user(x, ptr, size);
+	return size ? -EFAULT : 0;
+}
+
+#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ */
+#define __put_user(x, ptr) \
+({								\
+	__typeof__(*(ptr)) __x = (x);				\
+	int __pu_err = -EFAULT;					\
+        __chk_user_ptr(ptr);                                    \
+	switch (sizeof (*(ptr))) {				\
+	case 1:							\
+	case 2:							\
+	case 4:							\
+	case 8:							\
+		__pu_err = __put_user_fn(&__x, ptr,		\
+					 sizeof(*(ptr)));	\
+		break;						\
+	default:						\
+		__put_user_bad();				\
+		break;						\
+	 }							\
+	__pu_err;						\
+})
+
+#define put_user(x, ptr)					\
+({								\
+	might_fault();						\
+	__put_user(x, ptr);					\
+})
+
+
+int __put_user_bad(void) __attribute__((noreturn));
+
+#define __get_user(x, ptr)					\
+({								\
+	int __gu_err = -EFAULT;					\
+	__chk_user_ptr(ptr);					\
+	switch (sizeof(*(ptr))) {				\
+	case 1: {						\
+		unsigned char __x;				\
+		__gu_err = __get_user_fn(&__x, ptr,		\
+					 sizeof(*(ptr)));	\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 2: {						\
+		unsigned short __x;				\
+		__gu_err = __get_user_fn(&__x, ptr,		\
+					 sizeof(*(ptr)));	\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 4: {						\
+		unsigned int __x;				\
+		__gu_err = __get_user_fn(&__x, ptr,		\
+					 sizeof(*(ptr)));	\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	case 8: {						\
+		unsigned long long __x;				\
+		__gu_err = __get_user_fn(&__x, ptr,		\
+					 sizeof(*(ptr)));	\
+		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
+		break;						\
+	};							\
+	default:						\
+		__get_user_bad();				\
+		break;						\
+	}							\
+	__gu_err;						\
+})
+
+#define get_user(x, ptr)					\
+({								\
+	might_fault();						\
+	__get_user(x, ptr);					\
+})
+
+int __get_user_bad(void) __attribute__((noreturn));
+
+#define __put_user_unaligned __put_user
+#define __get_user_unaligned __get_user
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+static inline unsigned long __must_check
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	might_fault();
+	return __copy_to_user(to, from, n);
+}
+
+void copy_from_user_overflow(void)
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+__compiletime_warning("copy_from_user() buffer size is not provably correct")
+#endif
+;
+
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+static inline unsigned long __must_check
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	unsigned int sz = __compiletime_object_size(to);
+
+	might_fault();
+	if (unlikely(sz != -1 && sz < n)) {
+		copy_from_user_overflow();
+		return n;
+	}
+	return __copy_from_user(to, from, n);
+}
+
+unsigned long __must_check
+__copy_in_user(void __user *to, const void __user *from, unsigned long n);
+
+static inline unsigned long __must_check
+copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+	might_fault();
+	return __copy_in_user(to, from, n);
+}
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+long __strncpy_from_user(char *dst, const char __user *src, long count);
+
+static inline long __must_check
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+	might_fault();
+	return __strncpy_from_user(dst, src, count);
+}
+
+unsigned long __must_check __strnlen_user(const char __user *src, unsigned long count);
+
+static inline unsigned long strnlen_user(const char __user *src, unsigned long n)
+{
+	might_fault();
+	return __strnlen_user(src, n);
+}
+
+/**
+ * strlen_user: - Get the size of a string in user space.
+ * @str: The string to measure.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ *
+ * If there is a limit on the length of a valid string, you may wish to
+ * consider using strnlen_user() instead.
+ */
+#define strlen_user(str) strnlen_user(str, ~0UL)
+
+/*
+ * Zero Userspace
+ */
+unsigned long __must_check __clear_user(void __user *to, unsigned long size);
+
+static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
+{
+	might_fault();
+	return __clear_user(to, n);
+}
+
+int copy_to_user_real(void __user *dest, void *src, unsigned long count);
+void s390_kernel_write(void *dst, const void *src, size_t size);
+
+#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unaligned.h b/arch/s390/include/asm/unaligned.h
new file mode 100644
index 000000000..da9627afe
--- /dev/null
+++ b/arch/s390/include/asm/unaligned.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_S390_UNALIGNED_H
+#define _ASM_S390_UNALIGNED_H
+
+/*
+ * The S390 can do unaligned accesses itself. 
+ */
+#include <linux/unaligned/access_ok.h>
+#include <linux/unaligned/generic.h>
+
+#define get_unaligned	__get_unaligned_be
+#define put_unaligned	__put_unaligned_be
+
+#endif /* _ASM_S390_UNALIGNED_H */
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
new file mode 100644
index 000000000..91f56b1d8
--- /dev/null
+++ b/arch/s390/include/asm/unistd.h
@@ -0,0 +1,49 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/unistd.h"
+ */
+#ifndef _ASM_S390_UNISTD_H_
+#define _ASM_S390_UNISTD_H_
+
+#include <uapi/asm/unistd.h>
+
+
+#define __IGNORE_time
+
+/* Ignore NUMA system calls. Not wired up on s390. */
+#define __IGNORE_mbind
+#define __IGNORE_get_mempolicy
+#define __IGNORE_set_mempolicy
+#define __IGNORE_migrate_pages
+#define __IGNORE_move_pages
+
+/* Ignore system calls that are also reachable via sys_socket */
+#define __IGNORE_recvmmsg
+#define __IGNORE_sendmmsg
+
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_SIGNAL
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_IPC
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_OLD_GETRLIMIT
+#define __ARCH_WANT_SYS_OLD_MMAP
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+# ifdef CONFIG_COMPAT
+#   define __ARCH_WANT_COMPAT_SYS_TIME
+# endif
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
+
+#endif /* _ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/asm/uprobes.h b/arch/s390/include/asm/uprobes.h
new file mode 100644
index 000000000..1411dff7f
--- /dev/null
+++ b/arch/s390/include/asm/uprobes.h
@@ -0,0 +1,42 @@
+/*
+ *    User-space Probes (UProbes) for s390
+ *
+ *    Copyright IBM Corp. 2014
+ *    Author(s): Jan Willeke,
+ */
+
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+
+#include <linux/notifier.h>
+
+typedef u16 uprobe_opcode_t;
+
+#define UPROBE_XOL_SLOT_BYTES	256 /* cache aligned */
+
+#define UPROBE_SWBP_INSN	0x0002
+#define UPROBE_SWBP_INSN_SIZE	2
+
+struct arch_uprobe {
+	union{
+		uprobe_opcode_t insn[3];
+		uprobe_opcode_t ixol[3];
+	};
+	unsigned int saved_per : 1;
+	unsigned int saved_int_code;
+};
+
+struct arch_uprobe_task {
+};
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm,
+			     unsigned long addr);
+int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+				 void *data);
+void arch_uprobe_abort_xol(struct arch_uprobe *ap, struct pt_regs *regs);
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+						struct pt_regs *regs);
+#endif	/* _ASM_UPROBES_H */
diff --git a/arch/s390/include/asm/user.h b/arch/s390/include/asm/user.h
new file mode 100644
index 000000000..6ed1d1886
--- /dev/null
+++ b/arch/s390/include/asm/user.h
@@ -0,0 +1,74 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/usr.h"
+ */
+
+#ifndef _S390_USER_H
+#define _S390_USER_H
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+/* Core file format: The core file is written in such a way that gdb
+   can understand it and provide useful information to the user (under
+   linux we use the 'trad-core' bfd).  There are quite a number of
+   obstacles to being able to view the contents of the floating point
+   registers, and until these are solved you will not be able to view the
+   contents of them.  Actually, you can read in the core file and look at
+   the contents of the user struct to find out what the floating point
+   registers contain.
+   The actual file contents are as follows:
+   UPAGE: 1 page consisting of a user struct that tells gdb what is present
+   in the file.  Directly after this is a copy of the task_struct, which
+   is currently not used by gdb, but it may come in useful at some point.
+   All of the registers are stored as part of the upage.  The upage should
+   always be only one page.
+   DATA: The data area is stored.  We use current->end_text to
+   current->brk to pick up all of the user variables, plus any memory
+   that may have been malloced.  No attempt is made to determine if a page
+   is demand-zero or if a page is totally unused, we just cover the entire
+   range.  All of the addresses are rounded in such a way that an integral
+   number of pages is written.
+   STACK: We need the stack information in order to get a meaningful
+   backtrace.  We need to write the data from (esp) to
+   current->start_stack, so we round each of these off in order to be able
+   to write an integer number of pages.
+   The minimum core file size is 3 pages, or 12288 bytes.
+*/
+
+
+/*
+ * This is the old layout of "struct pt_regs", and
+ * is still the layout used by user mode (the new
+ * pt_regs doesn't have all registers as the kernel
+ * doesn't use the extra segment registers)
+ */
+
+/* When the kernel dumps core, it starts by dumping the user struct -
+   this will be used by gdb to figure out where the data and stack segments
+   are within the file, and what virtual addresses to use. */
+struct user {
+/* We start with the registers, to mimic the way that "memory" is returned
+   from the ptrace(3,...) function.  */
+  struct user_regs_struct regs;		/* Where the registers are actually stored */
+/* The rest of this junk is to help gdb figure out what goes where */
+  unsigned long int u_tsize;	/* Text segment size (pages). */
+  unsigned long int u_dsize;	/* Data segment size (pages). */
+  unsigned long int u_ssize;	/* Stack segment size (pages). */
+  unsigned long start_code;     /* Starting virtual address of text. */
+  unsigned long start_stack;	/* Starting virtual address of stack area.
+				   This is actually the bottom of the stack,
+				   the top of the stack is always found in the
+				   esp register.  */
+  long int signal;     		/* Signal that caused the core dump. */
+  unsigned long u_ar0;		/* Used by gdb to help find the values for */
+				/* the registers. */
+  unsigned long magic;		/* To uniquely identify a core file */
+  char u_comm[32];		/* User command that was responsible */
+};
+#define NBPG PAGE_SIZE
+#define UPAGES 1
+#define HOST_TEXT_START_ADDR (u.start_code)
+#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _S390_USER_H */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
new file mode 100644
index 000000000..787acd4f9
--- /dev/null
+++ b/arch/s390/include/asm/vdso.h
@@ -0,0 +1,49 @@
+#ifndef __S390_VDSO_H__
+#define __S390_VDSO_H__
+
+/* Default link addresses for the vDSOs */
+#define VDSO32_LBASE	0
+#define VDSO64_LBASE	0
+
+#define VDSO_VERSION_STRING	LINUX_2.6.29
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Note about the vdso_data and vdso_per_cpu_data structures:
+ *
+ * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
+ * structure is supposed to be known only to the function in the vdso
+ * itself and may change without notice.
+ */
+
+struct vdso_data {
+	__u64 tb_update_count;		/* Timebase atomicity ctr	0x00 */
+	__u64 xtime_tod_stamp;		/* TOD clock for xtime		0x08 */
+	__u64 xtime_clock_sec;		/* Kernel time			0x10 */
+	__u64 xtime_clock_nsec;		/*				0x18 */
+	__u64 xtime_coarse_sec;		/* Coarse kernel time		0x20 */
+	__u64 xtime_coarse_nsec;	/*				0x28 */
+	__u64 wtom_clock_sec;		/* Wall to monotonic clock	0x30 */
+	__u64 wtom_clock_nsec;		/*				0x38 */
+	__u64 wtom_coarse_sec;		/* Coarse wall to monotonic	0x40 */
+	__u64 wtom_coarse_nsec;		/*				0x48 */
+	__u32 tz_minuteswest;		/* Minutes west of Greenwich	0x50 */
+	__u32 tz_dsttime;		/* Type of dst correction	0x54 */
+	__u32 ectg_available;		/* ECTG instruction present	0x58 */
+	__u32 tk_mult;			/* Mult. used for xtime_nsec	0x5c */
+	__u32 tk_shift;			/* Shift used for xtime_nsec	0x60 */
+};
+
+struct vdso_per_cpu_data {
+	__u64 ectg_timer_base;
+	__u64 ectg_user_time;
+};
+
+extern struct vdso_data *vdso_data;
+
+int vdso_alloc_per_cpu(struct _lowcore *lowcore);
+void vdso_free_per_cpu(struct _lowcore *lowcore);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __S390_VDSO_H__ */
diff --git a/arch/s390/include/asm/vga.h b/arch/s390/include/asm/vga.h
new file mode 100644
index 000000000..d375526c2
--- /dev/null
+++ b/arch/s390/include/asm/vga.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_VGA_H
+#define _ASM_S390_VGA_H
+
+/* Avoid compile errors due to missing asm/vga.h */
+
+#endif /* _ASM_S390_VGA_H */
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
new file mode 100644
index 000000000..af9896c53
--- /dev/null
+++ b/arch/s390/include/asm/vtime.h
@@ -0,0 +1,7 @@
+#ifndef _S390_VTIME_H
+#define _S390_VTIME_H
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
+
+#endif /* _S390_VTIME_H */
diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h
new file mode 100644
index 000000000..10a179af6
--- /dev/null
+++ b/arch/s390/include/asm/vtimer.h
@@ -0,0 +1,31 @@
+/*
+ *  Copyright IBM Corp. 2003, 2012
+ *  Virtual CPU timer
+ *
+ *  Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_TIMER_H
+#define _ASM_S390_TIMER_H
+
+#define VTIMER_MAX_SLICE (0x7fffffffffffffffULL)
+
+struct vtimer_list {
+	struct list_head entry;
+	u64 expires;
+	u64 interval;
+	void (*function)(unsigned long);
+	unsigned long data;
+};
+
+extern void init_virt_timer(struct vtimer_list *timer);
+extern void add_virt_timer(struct vtimer_list *timer);
+extern void add_virt_timer_periodic(struct vtimer_list *timer);
+extern int mod_virt_timer(struct vtimer_list *timer, u64 expires);
+extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires);
+extern int del_virt_timer(struct vtimer_list *timer);
+
+extern void init_cpu_vtimer(void);
+extern void vtime_init(void);
+
+#endif /* _ASM_S390_TIMER_H */
diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h
new file mode 100644
index 000000000..c82eb12a5
--- /dev/null
+++ b/arch/s390/include/asm/xor.h
@@ -0,0 +1 @@
+#include <asm-generic/xor.h>
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
new file mode 100644
index 000000000..08fe6dad9
--- /dev/null
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -0,0 +1,52 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += byteorder.h
+header-y += chpid.h
+header-y += chsc.h
+header-y += cmb.h
+header-y += dasd.h
+header-y += debug.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += kvm.h
+header-y += kvm_para.h
+header-y += kvm_perf.h
+header-y += kvm_virtio.h
+header-y += mman.h
+header-y += monwriter.h
+header-y += msgbuf.h
+header-y += param.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += ptrace.h
+header-y += qeth.h
+header-y += resource.h
+header-y += schid.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += sclp_ctl.h
+header-y += sie.h
+header-y += stat.h
+header-y += statfs.h
+header-y += swab.h
+header-y += tape390.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += ucontext.h
+header-y += unistd.h
+header-y += virtio-ccw.h
+header-y += vtoc.h
+header-y += zcrypt.h
diff --git a/arch/s390/include/uapi/asm/auxvec.h b/arch/s390/include/uapi/asm/auxvec.h
new file mode 100644
index 000000000..a1f153e89
--- /dev/null
+++ b/arch/s390/include/uapi/asm/auxvec.h
@@ -0,0 +1,6 @@
+#ifndef __ASMS390_AUXVEC_H
+#define __ASMS390_AUXVEC_H
+
+#define AT_SYSINFO_EHDR		33
+
+#endif
diff --git a/arch/s390/include/uapi/asm/bitsperlong.h b/arch/s390/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..6b235aea9
--- /dev/null
+++ b/arch/s390/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+#ifndef __ASM_S390_BITSPERLONG_H
+#define __ASM_S390_BITSPERLONG_H
+
+#ifndef __s390x__
+#define __BITS_PER_LONG 32
+#else
+#define __BITS_PER_LONG 64
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_S390_BITSPERLONG_H */
+
diff --git a/arch/s390/include/uapi/asm/byteorder.h b/arch/s390/include/uapi/asm/byteorder.h
new file mode 100644
index 000000000..a332e59e2
--- /dev/null
+++ b/arch/s390/include/uapi/asm/byteorder.h
@@ -0,0 +1,6 @@
+#ifndef _S390_BYTEORDER_H
+#define _S390_BYTEORDER_H
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* _S390_BYTEORDER_H */
diff --git a/arch/s390/include/uapi/asm/chpid.h b/arch/s390/include/uapi/asm/chpid.h
new file mode 100644
index 000000000..6b4fb29cc
--- /dev/null
+++ b/arch/s390/include/uapi/asm/chpid.h
@@ -0,0 +1,22 @@
+/*
+ *    Copyright IBM Corp. 2007, 2012
+ *    Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
+ */
+
+#ifndef _UAPI_ASM_S390_CHPID_H
+#define _UAPI_ASM_S390_CHPID_H
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define __MAX_CHPID 255
+
+struct chp_id {
+	__u8 reserved1;
+	__u8 cssid;
+	__u8 reserved2;
+	__u8 id;
+} __attribute__((packed));
+
+
+#endif /* _UAPI_ASM_S390_CHPID_H */
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
new file mode 100644
index 000000000..65dc69472
--- /dev/null
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -0,0 +1,143 @@
+/*
+ * ioctl interface for /dev/chsc
+ *
+ * Copyright IBM Corp. 2008, 2012
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+
+#ifndef _ASM_CHSC_H
+#define _ASM_CHSC_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/chpid.h>
+#include <asm/schid.h>
+
+#define CHSC_SIZE 0x1000
+
+struct chsc_async_header {
+	__u16 length;
+	__u16 code;
+	__u32 cmd_dependend;
+	__u32 key : 4;
+	__u32 : 28;
+	struct subchannel_id sid;
+} __attribute__ ((packed));
+
+struct chsc_async_area {
+	struct chsc_async_header header;
+	__u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
+} __attribute__ ((packed));
+
+struct chsc_header {
+	__u16 length;
+	__u16 code;
+} __attribute__ ((packed));
+
+struct chsc_sync_area {
+	struct chsc_header header;
+	__u8 data[CHSC_SIZE - sizeof(struct chsc_header)];
+} __attribute__ ((packed));
+
+struct chsc_response_struct {
+	__u16 length;
+	__u16 code;
+	__u32 parms;
+	__u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)];
+} __attribute__ ((packed));
+
+struct chsc_chp_cd {
+	struct chp_id chpid;
+	int m;
+	int fmt;
+	struct chsc_response_struct cpcb;
+};
+
+struct chsc_cu_cd {
+	__u16 cun;
+	__u8 cssid;
+	int m;
+	int fmt;
+	struct chsc_response_struct cucb;
+};
+
+struct chsc_sch_cud {
+	struct subchannel_id schid;
+	int fmt;
+	struct chsc_response_struct scub;
+};
+
+struct conf_id {
+	int m;
+	__u8 cssid;
+	__u8 ssid;
+};
+
+struct chsc_conf_info {
+	struct conf_id id;
+	int fmt;
+	struct chsc_response_struct scid;
+};
+
+struct ccl_parm_chpid {
+	int m;
+	struct chp_id chp;
+};
+
+struct ccl_parm_cssids {
+	__u8 f_cssid;
+	__u8 l_cssid;
+};
+
+struct chsc_comp_list {
+	struct {
+		enum {
+			CCL_CU_ON_CHP = 1,
+			CCL_CHP_TYPE_CAP = 2,
+			CCL_CSS_IMG = 4,
+			CCL_CSS_IMG_CONF_CHAR = 5,
+			CCL_IOP_CHP = 6,
+		} ctype;
+		int fmt;
+		struct ccl_parm_chpid chpid;
+		struct ccl_parm_cssids cssids;
+	} req;
+	struct chsc_response_struct sccl;
+};
+
+struct chsc_dcal {
+	struct {
+		enum {
+			DCAL_CSS_IID_PN = 4,
+		} atype;
+		__u32 list_parm[2];
+		int fmt;
+	} req;
+	struct chsc_response_struct sdcal;
+};
+
+struct chsc_cpd_info {
+	struct chp_id chpid;
+	int m;
+	int fmt;
+	int rfmt;
+	int c;
+	struct chsc_response_struct chpdb;
+};
+
+#define CHSC_IOCTL_MAGIC 'c'
+
+#define CHSC_START _IOWR(CHSC_IOCTL_MAGIC, 0x81, struct chsc_async_area)
+#define CHSC_INFO_CHANNEL_PATH _IOWR(CHSC_IOCTL_MAGIC, 0x82, \
+				    struct chsc_chp_cd)
+#define CHSC_INFO_CU _IOWR(CHSC_IOCTL_MAGIC, 0x83, struct chsc_cu_cd)
+#define CHSC_INFO_SCH_CU _IOWR(CHSC_IOCTL_MAGIC, 0x84, struct chsc_sch_cud)
+#define CHSC_INFO_CI _IOWR(CHSC_IOCTL_MAGIC, 0x85, struct chsc_conf_info)
+#define CHSC_INFO_CCL _IOWR(CHSC_IOCTL_MAGIC, 0x86, struct chsc_comp_list)
+#define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info)
+#define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal)
+#define CHSC_START_SYNC _IOWR(CHSC_IOCTL_MAGIC, 0x89, struct chsc_sync_area)
+#define CHSC_ON_CLOSE_SET _IOWR(CHSC_IOCTL_MAGIC, 0x8a, struct chsc_async_area)
+#define CHSC_ON_CLOSE_REMOVE _IO(CHSC_IOCTL_MAGIC, 0x8b)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/cmb.h b/arch/s390/include/uapi/asm/cmb.h
new file mode 100644
index 000000000..0c086d00d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/cmb.h
@@ -0,0 +1,53 @@
+#ifndef _UAPIS390_CMB_H
+#define _UAPIS390_CMB_H
+
+#include <linux/types.h>
+
+/**
+ * struct cmbdata - channel measurement block data for user space
+ * @size: size of the stored data
+ * @elapsed_time: time since last sampling
+ * @ssch_rsch_count: number of ssch and rsch
+ * @sample_count: number of samples
+ * @device_connect_time: time of device connect
+ * @function_pending_time: time of function pending
+ * @device_disconnect_time: time of device disconnect
+ * @control_unit_queuing_time: time of control unit queuing
+ * @device_active_only_time: time of device active only
+ * @device_busy_time: time of device busy (ext. format)
+ * @initial_command_response_time: initial command response time (ext. format)
+ *
+ * All values are stored as 64 bit for simplicity, especially
+ * in 32 bit emulation mode. All time values are normalized to
+ * nanoseconds.
+ * Currently, two formats are known, which differ by the size of
+ * this structure, i.e. the last two members are only set when
+ * the extended channel measurement facility (first shipped in
+ * z990 machines) is activated.
+ * Potentially, more fields could be added, which would result in a
+ * new ioctl number.
+ */
+struct cmbdata {
+	__u64 size;
+	__u64 elapsed_time;
+ /* basic and exended format: */
+	__u64 ssch_rsch_count;
+	__u64 sample_count;
+	__u64 device_connect_time;
+	__u64 function_pending_time;
+	__u64 device_disconnect_time;
+	__u64 control_unit_queuing_time;
+	__u64 device_active_only_time;
+ /* extended format only: */
+	__u64 device_busy_time;
+	__u64 initial_command_response_time;
+};
+
+/* enable channel measurement */
+#define BIODASDCMFENABLE	_IO(DASD_IOCTL_LETTER, 32)
+/* enable channel measurement */
+#define BIODASDCMFDISABLE	_IO(DASD_IOCTL_LETTER, 33)
+/* read channel measurement data */
+#define BIODASDREADALLCMB	_IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata)
+
+#endif /* _UAPIS390_CMB_H */
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
new file mode 100644
index 000000000..5812a3b2d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -0,0 +1,295 @@
+/* 
+ * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 1999, 2000
+ * EMC Symmetrix ioctl Copyright EMC Corporation, 2008
+ * Author.........: Nigel Hislop <hislop_nigel@emc.com>
+ *
+ * This file is the interface of the DASD device driver, which is exported to user space
+ * any future changes wrt the API will result in a change of the APIVERSION reported
+ * to userspace by the DASDAPIVER-ioctl
+ *
+ */
+
+#ifndef DASD_H
+#define DASD_H
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define DASD_IOCTL_LETTER 'D'
+
+#define DASD_API_VERSION 6
+
+/* 
+ * struct dasd_information2_t
+ * represents any data about the device, which is visible to userspace.
+ *  including foramt and featueres.
+ */
+typedef struct dasd_information2_t {
+        unsigned int devno;         /* S/390 devno */
+        unsigned int real_devno;    /* for aliases */
+        unsigned int schid;         /* S/390 subchannel identifier */
+        unsigned int cu_type  : 16; /* from SenseID */
+        unsigned int cu_model :  8; /* from SenseID */
+        unsigned int dev_type : 16; /* from SenseID */
+        unsigned int dev_model : 8; /* from SenseID */
+        unsigned int open_count; 
+        unsigned int req_queue_len; 
+        unsigned int chanq_len;     /* length of chanq */
+        char type[4];               /* from discipline.name, 'none' for unknown */
+        unsigned int status;        /* current device level */
+        unsigned int label_block;   /* where to find the VOLSER */
+        unsigned int FBA_layout;    /* fixed block size (like AIXVOL) */
+        unsigned int characteristics_size;
+        unsigned int confdata_size;
+        char characteristics[64];   /* from read_device_characteristics */
+        char configuration_data[256]; /* from read_configuration_data */
+        unsigned int format;          /* format info like formatted/cdl/ldl/... */
+        unsigned int features;        /* dasd features like 'ro',...            */
+        unsigned int reserved0;       /* reserved for further use ,...          */
+        unsigned int reserved1;       /* reserved for further use ,...          */
+        unsigned int reserved2;       /* reserved for further use ,...          */
+        unsigned int reserved3;       /* reserved for further use ,...          */
+        unsigned int reserved4;       /* reserved for further use ,...          */
+        unsigned int reserved5;       /* reserved for further use ,...          */
+        unsigned int reserved6;       /* reserved for further use ,...          */
+        unsigned int reserved7;       /* reserved for further use ,...          */
+} dasd_information2_t;
+
+/*
+ * values to be used for dasd_information_t.format
+ * 0x00: NOT formatted
+ * 0x01: Linux disc layout
+ * 0x02: Common disc layout
+ */
+#define DASD_FORMAT_NONE 0
+#define DASD_FORMAT_LDL  1
+#define DASD_FORMAT_CDL  2
+/*
+ * values to be used for dasd_information_t.features
+ * 0x00: default features
+ * 0x01: readonly (ro)
+ * 0x02: use diag discipline (diag)
+ * 0x04: set the device initially online (internal use only)
+ * 0x08: enable ERP related logging
+ * 0x20: give access to raw eckd data
+ */
+#define DASD_FEATURE_DEFAULT	     0x00
+#define DASD_FEATURE_READONLY	     0x01
+#define DASD_FEATURE_USEDIAG	     0x02
+#define DASD_FEATURE_INITIAL_ONLINE  0x04
+#define DASD_FEATURE_ERPLOG	     0x08
+#define DASD_FEATURE_FAILFAST	     0x10
+#define DASD_FEATURE_FAILONSLCK      0x20
+#define DASD_FEATURE_USERAW	     0x40
+
+#define DASD_PARTN_BITS 2
+
+/* 
+ * struct dasd_information_t
+ * represents any data about the data, which is visible to userspace
+ */
+typedef struct dasd_information_t {
+        unsigned int devno;         /* S/390 devno */
+        unsigned int real_devno;    /* for aliases */
+        unsigned int schid;         /* S/390 subchannel identifier */
+        unsigned int cu_type  : 16; /* from SenseID */
+        unsigned int cu_model :  8; /* from SenseID */
+        unsigned int dev_type : 16; /* from SenseID */
+        unsigned int dev_model : 8; /* from SenseID */
+        unsigned int open_count; 
+        unsigned int req_queue_len; 
+        unsigned int chanq_len;     /* length of chanq */
+        char type[4];               /* from discipline.name, 'none' for unknown */
+        unsigned int status;        /* current device level */
+        unsigned int label_block;   /* where to find the VOLSER */
+        unsigned int FBA_layout;    /* fixed block size (like AIXVOL) */
+        unsigned int characteristics_size;
+        unsigned int confdata_size;
+        char characteristics[64];   /* from read_device_characteristics */
+        char configuration_data[256]; /* from read_configuration_data */
+} dasd_information_t;
+
+/*
+ * Read Subsystem Data - Performance Statistics
+ */ 
+typedef struct dasd_rssd_perf_stats_t {
+	unsigned char  invalid:1;
+	unsigned char  format:3;
+	unsigned char  data_format:4;
+	unsigned char  unit_address;
+	unsigned short device_status;
+	unsigned int   nr_read_normal;
+	unsigned int   nr_read_normal_hits;
+	unsigned int   nr_write_normal;
+	unsigned int   nr_write_fast_normal_hits;
+	unsigned int   nr_read_seq;
+	unsigned int   nr_read_seq_hits;
+	unsigned int   nr_write_seq;
+	unsigned int   nr_write_fast_seq_hits;
+	unsigned int   nr_read_cache;
+	unsigned int   nr_read_cache_hits;
+	unsigned int   nr_write_cache;
+	unsigned int   nr_write_fast_cache_hits;
+	unsigned int   nr_inhibit_cache;
+	unsigned int   nr_bybass_cache;
+	unsigned int   nr_seq_dasd_to_cache;
+	unsigned int   nr_dasd_to_cache;
+	unsigned int   nr_cache_to_dasd;
+	unsigned int   nr_delayed_fast_write;
+	unsigned int   nr_normal_fast_write;
+	unsigned int   nr_seq_fast_write;
+	unsigned int   nr_cache_miss;
+	unsigned char  status2;
+	unsigned int   nr_quick_write_promotes;
+	unsigned char  reserved;
+	unsigned short ssid;
+	unsigned char  reseved2[96];
+} __attribute__((packed)) dasd_rssd_perf_stats_t;
+
+/* 
+ * struct profile_info_t
+ * holds the profinling information 
+ */
+typedef struct dasd_profile_info_t {
+        unsigned int dasd_io_reqs;	 /* number of requests processed at all */
+        unsigned int dasd_io_sects;	 /* number of sectors processed at all */
+        unsigned int dasd_io_secs[32];	 /* histogram of request's sizes */
+        unsigned int dasd_io_times[32];	 /* histogram of requests's times */
+        unsigned int dasd_io_timps[32];	 /* histogram of requests's times per sector */
+        unsigned int dasd_io_time1[32];	 /* histogram of time from build to start */
+        unsigned int dasd_io_time2[32];	 /* histogram of time from start to irq */
+        unsigned int dasd_io_time2ps[32]; /* histogram of time from start to irq */
+        unsigned int dasd_io_time3[32];	 /* histogram of time from irq to end */
+        unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */
+} dasd_profile_info_t;
+
+/*
+ * struct format_data_t
+ * represents all data necessary to format a dasd
+ */
+typedef struct format_data_t {
+	unsigned int start_unit; /* from track */
+	unsigned int stop_unit;  /* to track */
+	unsigned int blksize;	 /* sectorsize */
+	unsigned int intensity;
+} format_data_t;
+
+/*
+ * values to be used for format_data_t.intensity
+ * 0/8: normal format
+ * 1/9: also write record zero
+ * 3/11: also write home address
+ * 4/12: invalidate track
+ */
+#define DASD_FMT_INT_FMT_R0 1 /* write record zero */
+#define DASD_FMT_INT_FMT_HA 2 /* write home address, also set FMT_R0 ! */
+#define DASD_FMT_INT_INVAL  4 /* invalidate tracks */
+#define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */
+
+
+/* 
+ * struct attrib_data_t
+ * represents the operation (cache) bits for the device.
+ * Used in DE to influence caching of the DASD.
+ */
+typedef struct attrib_data_t {
+	unsigned char operation:3;     /* cache operation mode */
+	unsigned char reserved:5;      /* cache operation mode */
+	__u16         nr_cyl;          /* no of cyliners for read ahaed */
+	__u8          reserved2[29];   /* for future use */
+} __attribute__ ((packed)) attrib_data_t;
+
+/* definition of operation (cache) bits within attributes of DE */
+#define DASD_NORMAL_CACHE  0x0
+#define DASD_BYPASS_CACHE  0x1
+#define DASD_INHIBIT_LOAD  0x2
+#define DASD_SEQ_ACCESS    0x3
+#define DASD_SEQ_PRESTAGE  0x4
+#define DASD_REC_ACCESS    0x5
+
+/*
+ * Perform EMC Symmetrix I/O
+ */
+typedef struct dasd_symmio_parms {
+	unsigned char reserved[8];	/* compat with older releases */
+	unsigned long long psf_data;	/* char * cast to u64 */
+	unsigned long long rssd_result; /* char * cast to u64 */
+	int psf_data_len;
+	int rssd_result_len;
+} __attribute__ ((packed)) dasd_symmio_parms_t;
+
+/*
+ * Data returned by Sense Path Group ID (SNID)
+ */
+struct dasd_snid_data {
+	struct {
+		__u8 group:2;
+		__u8 reserve:2;
+		__u8 mode:1;
+		__u8 res:3;
+	} __attribute__ ((packed)) path_state;
+	__u8 pgid[11];
+} __attribute__ ((packed));
+
+struct dasd_snid_ioctl_data {
+	struct dasd_snid_data data;
+	__u8 path_mask;
+} __attribute__ ((packed));
+
+
+/********************************************************************************
+ * SECTION: Definition of IOCTLs
+ *
+ * Here ist how the ioctl-nr should be used:
+ *    0 -   31   DASD driver itself
+ *   32 -  239   still open
+ *  240 -  255   reserved for EMC 
+ *******************************************************************************/
+
+/* Disable the volume (for Linux) */
+#define BIODASDDISABLE _IO(DASD_IOCTL_LETTER,0) 
+/* Enable the volume (for Linux) */
+#define BIODASDENABLE  _IO(DASD_IOCTL_LETTER,1)  
+/* Issue a reserve/release command, rsp. */
+#define BIODASDRSRV    _IO(DASD_IOCTL_LETTER,2) /* reserve */
+#define BIODASDRLSE    _IO(DASD_IOCTL_LETTER,3) /* release */
+#define BIODASDSLCK    _IO(DASD_IOCTL_LETTER,4) /* steal lock */
+/* reset profiling information of a device */
+#define BIODASDPRRST   _IO(DASD_IOCTL_LETTER,5)
+/* Quiesce IO on device */
+#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) 
+/* Resume IO on device */
+#define BIODASDRESUME  _IO(DASD_IOCTL_LETTER,7) 
+/* Abort all I/O on a device */
+#define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240)
+/* Allow I/O on a device */
+#define BIODASDALLOWIO _IO(DASD_IOCTL_LETTER, 241)
+
+
+/* retrieve API version number */
+#define DASDAPIVER     _IOR(DASD_IOCTL_LETTER,0,int)
+/* Get information on a dasd device */
+#define BIODASDINFO    _IOR(DASD_IOCTL_LETTER,1,dasd_information_t)
+/* retrieve profiling information of a device */
+#define BIODASDPRRD    _IOR(DASD_IOCTL_LETTER,2,dasd_profile_info_t)
+/* Get information on a dasd device (enhanced) */
+#define BIODASDINFO2   _IOR(DASD_IOCTL_LETTER,3,dasd_information2_t)
+/* Performance Statistics Read */
+#define BIODASDPSRD    _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t)
+/* Get Attributes (cache operations) */
+#define BIODASDGATTR   _IOR(DASD_IOCTL_LETTER,5,attrib_data_t) 
+
+
+/* #define BIODASDFORMAT  _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */
+#define BIODASDFMT     _IOW(DASD_IOCTL_LETTER,1,format_data_t) 
+/* Set Attributes (cache operations) */
+#define BIODASDSATTR   _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) 
+
+/* Get Sense Path Group ID (SNID) data */
+#define BIODASDSNID    _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data)
+
+#define BIODASDSYMMIO  _IOWR(DASD_IOCTL_LETTER, 240, dasd_symmio_parms_t)
+
+#endif				/* DASD_H */
+
diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h
new file mode 100644
index 000000000..c59fc7912
--- /dev/null
+++ b/arch/s390/include/uapi/asm/debug.h
@@ -0,0 +1,34 @@
+/*
+ *   S/390 debug facility
+ *
+ *    Copyright IBM Corp. 1999, 2000
+ */
+
+#ifndef _UAPIDEBUG_H
+#define _UAPIDEBUG_H
+
+#include <linux/fs.h>
+
+/* Note:
+ * struct __debug_entry must be defined outside of #ifdef __KERNEL__ 
+ * in order to allow a user program to analyze the 'raw'-view.
+ */
+
+struct __debug_entry{
+        union {
+                struct {
+                        unsigned long long clock:52;
+                        unsigned long long exception:1;
+                        unsigned long long level:3;
+                        unsigned long long cpuid:8;
+                } fields;
+
+                unsigned long long stck;
+        } id;
+        void* caller;
+} __attribute__((packed));
+
+
+#define __DEBUG_FEATURE_VERSION      2  /* version of debug feature */
+
+#endif /* _UAPIDEBUG_H */
diff --git a/arch/s390/include/uapi/asm/errno.h b/arch/s390/include/uapi/asm/errno.h
new file mode 100644
index 000000000..395e97d80
--- /dev/null
+++ b/arch/s390/include/uapi/asm/errno.h
@@ -0,0 +1,11 @@
+/*
+ *  S390 version
+ *
+ */
+
+#ifndef _S390_ERRNO_H
+#define _S390_ERRNO_H
+
+#include <asm-generic/errno.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/fcntl.h b/arch/s390/include/uapi/asm/fcntl.h
new file mode 100644
index 000000000..46ab12db5
--- /dev/null
+++ b/arch/s390/include/uapi/asm/fcntl.h
@@ -0,0 +1 @@
+#include <asm-generic/fcntl.h>
diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h
new file mode 100644
index 000000000..b3fe12d8d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/hypfs.h
@@ -0,0 +1,54 @@
+/*
+ * Structures for hypfs interface
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _ASM_HYPFS_H
+#define _ASM_HYPFS_H
+
+#include <linux/types.h>
+
+/*
+ * IOCTL for binary interface /sys/kernel/debug/diag_304
+ */
+struct hypfs_diag304 {
+	__u32	args[2];
+	__u64	data;
+	__u64	rc;
+} __attribute__((packed));
+
+#define HYPFS_IOCTL_MAGIC 0x10
+
+#define HYPFS_DIAG304 \
+	_IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304)
+
+/*
+ * Structures for binary interface /sys/kernel/debug/diag_0c
+ */
+struct hypfs_diag0c_hdr {
+	__u64	len;		/* Length of diag0c buffer without header */
+	__u16	version;	/* Version of header */
+	char	reserved1[6];	/* Reserved */
+	char	tod_ext[16];	/* TOD clock for diag0c */
+	__u64	count;		/* Number of entries (CPUs) in diag0c array */
+	char	reserved2[24];	/* Reserved */
+};
+
+struct hypfs_diag0c_entry {
+	char	date[8];	/* MM/DD/YY in EBCDIC */
+	char	time[8];	/* HH:MM:SS in EBCDIC */
+	__u64	virtcpu;	/* Virtual time consumed by the virt CPU (us) */
+	__u64	totalproc;	/* Total of virtual and simulation time (us) */
+	__u32	cpu;		/* Linux logical CPU number */
+	__u32	reserved;	/* Align to 8 byte */
+};
+
+struct hypfs_diag0c_data {
+	struct hypfs_diag0c_hdr		hdr;		/* 64 byte header */
+	struct hypfs_diag0c_entry	entry[];	/* diag0c entry array */
+};
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ioctl.h b/arch/s390/include/uapi/asm/ioctl.h
new file mode 100644
index 000000000..b279fe06d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ioctl.h
@@ -0,0 +1 @@
+#include <asm-generic/ioctl.h>
diff --git a/arch/s390/include/uapi/asm/ioctls.h b/arch/s390/include/uapi/asm/ioctls.h
new file mode 100644
index 000000000..960a4c1eb
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ioctls.h
@@ -0,0 +1,8 @@
+#ifndef __ARCH_S390_IOCTLS_H__
+#define __ARCH_S390_IOCTLS_H__
+
+#define FIOQSIZE	0x545E
+
+#include <asm-generic/ioctls.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
new file mode 100644
index 000000000..37f293d12
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -0,0 +1,31 @@
+#ifndef __S390_IPCBUF_H__
+#define __S390_IPCBUF_H__
+
+/*
+ * The user_ipc_perm structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t and seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm
+{
+	__kernel_key_t		key;
+	__kernel_uid32_t	uid;
+	__kernel_gid32_t	gid;
+	__kernel_uid32_t	cuid;
+	__kernel_gid32_t	cgid;
+	__kernel_mode_t		mode;
+	unsigned short		__pad1;
+	unsigned short		seq;
+#ifndef __s390x__
+	unsigned short		__pad2;
+#endif /* ! __s390x__ */
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+#endif /* __S390_IPCBUF_H__ */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..ef1a5fcc6
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -0,0 +1,182 @@
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+/*
+ * KVM s390 specific structures and definitions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#include <linux/types.h>
+
+#define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS	1
+#define KVM_DEV_FLIC_ENQUEUE		2
+#define KVM_DEV_FLIC_CLEAR_IRQS		3
+#define KVM_DEV_FLIC_APF_ENABLE		4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT	5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER	6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY	7
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up  to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+#define KVM_S390_MAX_FLOAT_IRQS	266250
+#define KVM_S390_FLIC_MAX_BUFFER	0x2000000
+
+struct kvm_s390_io_adapter {
+	__u32 id;
+	__u8 isc;
+	__u8 maskable;
+	__u8 swap;
+	__u8 pad;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+	__u32 id;
+	__u8 type;
+	__u8 mask;
+	__u16 pad0;
+	__u64 addr;
+};
+
+/* kvm attr_group  on vm fd */
+#define KVM_S390_VM_MEM_CTRL		0
+#define KVM_S390_VM_TOD			1
+#define KVM_S390_VM_CRYPTO		2
+#define KVM_S390_VM_CPU_MODEL		3
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA	0
+#define KVM_S390_VM_MEM_CLR_CMMA	1
+#define KVM_S390_VM_MEM_LIMIT_SIZE	2
+
+/* kvm attributes for KVM_S390_VM_TOD */
+#define KVM_S390_VM_TOD_LOW		0
+#define KVM_S390_VM_TOD_HIGH		1
+
+/* kvm attributes for KVM_S390_VM_CPU_MODEL */
+/* processor related attributes are r/w */
+#define KVM_S390_VM_CPU_PROCESSOR	0
+struct kvm_s390_vm_cpu_processor {
+	__u64 cpuid;
+	__u16 ibc;
+	__u8  pad[6];
+	__u64 fac_list[256];
+};
+
+/* machine related attributes are r/o */
+#define KVM_S390_VM_CPU_MACHINE		1
+struct kvm_s390_vm_cpu_machine {
+	__u64 cpuid;
+	__u32 ibc;
+	__u8  pad[4];
+	__u64 fac_mask[256];
+	__u64 fac_list[256];
+};
+
+/* kvm attributes for crypto */
+#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
+#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
+#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
+#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+	/* general purpose regs for s390 */
+	__u64 gprs[16];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+	__u32 acrs[16];
+	__u64 crs[16];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+	__u32 fpc;
+	__u64 fprs[16];
+};
+
+#define KVM_GUESTDBG_USE_HW_BP		0x00010000
+
+#define KVM_HW_BP			1
+#define KVM_HW_WP_WRITE			2
+#define KVM_SINGLESTEP			4
+
+struct kvm_debug_exit_arch {
+	__u64 addr;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+	__u64 addr;
+	__u64 phys_addr;
+	__u64 len;
+	__u8 type;
+	__u8 pad[7]; /* Should be set to 0 */
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+	__u32 nr_hw_bp;
+	__u32 pad; /* Should be set to 0 */
+	struct kvm_hw_breakpoint __user *hw_bp;
+};
+
+/* for KVM_SYNC_PFAULT and KVM_REG_S390_PFTOKEN */
+#define KVM_S390_PFAULT_TOKEN_INVALID	0xffffffffffffffffULL
+
+#define KVM_SYNC_PREFIX (1UL << 0)
+#define KVM_SYNC_GPRS   (1UL << 1)
+#define KVM_SYNC_ACRS   (1UL << 2)
+#define KVM_SYNC_CRS    (1UL << 3)
+#define KVM_SYNC_ARCH0  (1UL << 4)
+#define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS    (1UL << 6)
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+	__u64 prefix;	/* prefix register */
+	__u64 gprs[16];	/* general purpose registers */
+	__u32 acrs[16];	/* access registers */
+	__u64 crs[16];	/* control registers */
+	__u64 todpr;	/* tod programmable register [ARCH0] */
+	__u64 cputm;	/* cpu timer [ARCH0] */
+	__u64 ckc;	/* clock comparator [ARCH0] */
+	__u64 pp;	/* program parameter [ARCH0] */
+	__u64 gbea;	/* guest breaking-event address [ARCH0] */
+	__u64 pft;	/* pfault token [PFAULT] */
+	__u64 pfs;	/* pfault select [PFAULT] */
+	__u64 pfc;	/* pfault compare [PFAULT] */
+	__u64 vrs[32][2];	/* vector registers */
+	__u8  reserved[512];	/* for future vector expansion */
+	__u32 fpc;	/* only valid with vector registers */
+};
+
+#define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
+#define KVM_REG_S390_EPOCHDIFF	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_S390_CPU_TIMER  (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_S390_PP		(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_S390_GBEA	(KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h
new file mode 100644
index 000000000..ff1f4e7b3
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_para.h
@@ -0,0 +1,11 @@
+/*
+ * User API definitions for paravirtual devices on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000000000..397282727
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,25 @@
+/*
+ * Definitions for perf-kvm on s390
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_KVM_PERF_S390_H
+#define __LINUX_KVM_PERF_S390_H
+
+#include <asm/sie.h>
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
+#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
+#define KVM_EXIT_REASON "icptcode"
+
+#endif
diff --git a/arch/s390/include/uapi/asm/kvm_virtio.h b/arch/s390/include/uapi/asm/kvm_virtio.h
new file mode 100644
index 000000000..44a438ca9
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_virtio.h
@@ -0,0 +1,64 @@
+/*
+ * definition for virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_VIRTIO_H
+#define __KVM_S390_VIRTIO_H
+
+#include <linux/types.h>
+
+struct kvm_device_desc {
+	/* The device type: console, network, disk etc.  Type 0 terminates. */
+	__u8 type;
+	/* The number of virtqueues (first in config array) */
+	__u8 num_vq;
+	/*
+	 * The number of bytes of feature bits.  Multiply by 2: one for host
+	 * features and one for guest acknowledgements.
+	 */
+	__u8 feature_len;
+	/* The number of bytes of the config array after virtqueues. */
+	__u8 config_len;
+	/* A status byte, written by the Guest. */
+	__u8 status;
+	__u8 config[0];
+};
+
+/*
+ * This is how we expect the device configuration field for a virtqueue
+ * to be laid out in config space.
+ */
+struct kvm_vqconfig {
+	/* The token returned with an interrupt. Set by the guest */
+	__u64 token;
+	/* The address of the virtio ring */
+	__u64 address;
+	/* The number of entries in the virtio_ring */
+	__u16 num;
+
+};
+
+#define KVM_S390_VIRTIO_NOTIFY		0
+#define KVM_S390_VIRTIO_RESET		1
+#define KVM_S390_VIRTIO_SET_STATUS	2
+
+/* The alignment to use between consumer and producer parts of vring.
+ * This is pagesize for historical reasons. */
+#define KVM_S390_VIRTIO_RING_ALIGN	4096
+
+
+/* These values are supposed to be in ext_params on an interrupt */
+#define VIRTIO_PARAM_MASK		0xff
+#define VIRTIO_PARAM_VRING_INTERRUPT	0x0
+#define VIRTIO_PARAM_CONFIG_CHANGED	0x1
+#define VIRTIO_PARAM_DEV_ADD		0x2
+
+#endif
diff --git a/arch/s390/include/uapi/asm/mman.h b/arch/s390/include/uapi/asm/mman.h
new file mode 100644
index 000000000..de23da1f4
--- /dev/null
+++ b/arch/s390/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/mman.h"
+ */
+#include <asm-generic/mman.h>
diff --git a/arch/s390/include/uapi/asm/monwriter.h b/arch/s390/include/uapi/asm/monwriter.h
new file mode 100644
index 000000000..f845c8e2f
--- /dev/null
+++ b/arch/s390/include/uapi/asm/monwriter.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright IBM Corp. 2006
+ * Character device driver for writing z/VM APPLDATA monitor records
+ * Version 1.0
+ * Author(s): Melissa Howland <melissah@us.ibm.com>
+ *
+ */
+
+#ifndef _ASM_390_MONWRITER_H
+#define _ASM_390_MONWRITER_H
+
+/* mon_function values */
+#define MONWRITE_START_INTERVAL	0x00 /* start interval recording */
+#define MONWRITE_STOP_INTERVAL	0x01 /* stop interval or config recording */
+#define MONWRITE_GEN_EVENT	0x02 /* generate event record */
+#define MONWRITE_START_CONFIG	0x03 /* start configuration recording */
+
+/* the header the app uses in its write() data */
+struct monwrite_hdr {
+	unsigned char mon_function;
+	unsigned short applid;
+	unsigned char record_num;
+	unsigned short version;
+	unsigned short release;
+	unsigned short mod_level;
+	unsigned short datalen;
+	unsigned char hdrlen;
+
+} __attribute__((packed));
+
+#endif /* _ASM_390_MONWRITER_H */
diff --git a/arch/s390/include/uapi/asm/msgbuf.h b/arch/s390/include/uapi/asm/msgbuf.h
new file mode 100644
index 000000000..1bbdee927
--- /dev/null
+++ b/arch/s390/include/uapi/asm/msgbuf.h
@@ -0,0 +1,37 @@
+#ifndef _S390_MSGBUF_H
+#define _S390_MSGBUF_H
+
+/* 
+ * The msqid64_ds structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+#ifndef __s390x__
+	unsigned long	__unused1;
+#endif /* ! __s390x__ */
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+#ifndef __s390x__
+	unsigned long	__unused2;
+#endif /* ! __s390x__ */
+	__kernel_time_t msg_ctime;	/* last change time */
+#ifndef __s390x__
+	unsigned long	__unused3;
+#endif /* ! __s390x__ */
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+
+#endif /* _S390_MSGBUF_H */
diff --git a/arch/s390/include/uapi/asm/param.h b/arch/s390/include/uapi/asm/param.h
new file mode 100644
index 000000000..c616821bf
--- /dev/null
+++ b/arch/s390/include/uapi/asm/param.h
@@ -0,0 +1,6 @@
+#ifndef _ASMS390_PARAM_H
+#define _ASMS390_PARAM_H
+
+#include <asm-generic/param.h>
+
+#endif /* _ASMS390_PARAM_H */
diff --git a/arch/s390/include/uapi/asm/poll.h b/arch/s390/include/uapi/asm/poll.h
new file mode 100644
index 000000000..c98509d31
--- /dev/null
+++ b/arch/s390/include/uapi/asm/poll.h
@@ -0,0 +1 @@
+#include <asm-generic/poll.h>
diff --git a/arch/s390/include/uapi/asm/posix_types.h b/arch/s390/include/uapi/asm/posix_types.h
new file mode 100644
index 000000000..bf2a2ad2f
--- /dev/null
+++ b/arch/s390/include/uapi/asm/posix_types.h
@@ -0,0 +1,51 @@
+/*
+ *  S390 version
+ *
+ */
+
+#ifndef __ARCH_S390_POSIX_TYPES_H
+#define __ARCH_S390_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long   __kernel_size_t;
+typedef long            __kernel_ssize_t;
+#define __kernel_size_t __kernel_size_t
+
+typedef unsigned short	__kernel_old_dev_t;
+#define __kernel_old_dev_t __kernel_old_dev_t
+
+#ifndef __s390x__
+
+typedef unsigned long   __kernel_ino_t;
+typedef unsigned short  __kernel_mode_t;
+typedef unsigned short  __kernel_ipc_pid_t;
+typedef unsigned short  __kernel_uid_t;
+typedef unsigned short  __kernel_gid_t;
+typedef int             __kernel_ptrdiff_t;
+
+#else /* __s390x__ */
+
+typedef unsigned int    __kernel_ino_t;
+typedef unsigned int    __kernel_mode_t;
+typedef int             __kernel_ipc_pid_t;
+typedef unsigned int    __kernel_uid_t;
+typedef unsigned int    __kernel_gid_t;
+typedef long            __kernel_ptrdiff_t;
+typedef unsigned long   __kernel_sigset_t;      /* at least 32 bits */
+
+#endif /* __s390x__ */
+
+#define __kernel_ino_t  __kernel_ino_t
+#define __kernel_mode_t __kernel_mode_t
+#define __kernel_ipc_pid_t __kernel_ipc_pid_t
+#define __kernel_uid_t __kernel_uid_t
+#define __kernel_gid_t __kernel_gid_t
+
+#include <asm-generic/posix_types.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
new file mode 100644
index 000000000..a150f4fab
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -0,0 +1,459 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+
+#ifndef _UAPI_S390_PTRACE_H
+#define _UAPI_S390_PTRACE_H
+
+/*
+ * Offsets in the user_regs_struct. They are used for the ptrace
+ * system call and in entry.S
+ */
+#ifndef __s390x__
+
+#define PT_PSWMASK  0x00
+#define PT_PSWADDR  0x04
+#define PT_GPR0     0x08
+#define PT_GPR1     0x0C
+#define PT_GPR2     0x10
+#define PT_GPR3     0x14
+#define PT_GPR4     0x18
+#define PT_GPR5     0x1C
+#define PT_GPR6     0x20
+#define PT_GPR7     0x24
+#define PT_GPR8     0x28
+#define PT_GPR9     0x2C
+#define PT_GPR10    0x30
+#define PT_GPR11    0x34
+#define PT_GPR12    0x38
+#define PT_GPR13    0x3C
+#define PT_GPR14    0x40
+#define PT_GPR15    0x44
+#define PT_ACR0     0x48
+#define PT_ACR1     0x4C
+#define PT_ACR2     0x50
+#define PT_ACR3     0x54
+#define PT_ACR4	    0x58
+#define PT_ACR5	    0x5C
+#define PT_ACR6	    0x60
+#define PT_ACR7	    0x64
+#define PT_ACR8	    0x68
+#define PT_ACR9	    0x6C
+#define PT_ACR10    0x70
+#define PT_ACR11    0x74
+#define PT_ACR12    0x78
+#define PT_ACR13    0x7C
+#define PT_ACR14    0x80
+#define PT_ACR15    0x84
+#define PT_ORIGGPR2 0x88
+#define PT_FPC	    0x90
+/*
+ * A nasty fact of life that the ptrace api
+ * only supports passing of longs.
+ */
+#define PT_FPR0_HI  0x98
+#define PT_FPR0_LO  0x9C
+#define PT_FPR1_HI  0xA0
+#define PT_FPR1_LO  0xA4
+#define PT_FPR2_HI  0xA8
+#define PT_FPR2_LO  0xAC
+#define PT_FPR3_HI  0xB0
+#define PT_FPR3_LO  0xB4
+#define PT_FPR4_HI  0xB8
+#define PT_FPR4_LO  0xBC
+#define PT_FPR5_HI  0xC0
+#define PT_FPR5_LO  0xC4
+#define PT_FPR6_HI  0xC8
+#define PT_FPR6_LO  0xCC
+#define PT_FPR7_HI  0xD0
+#define PT_FPR7_LO  0xD4
+#define PT_FPR8_HI  0xD8
+#define PT_FPR8_LO  0XDC
+#define PT_FPR9_HI  0xE0
+#define PT_FPR9_LO  0xE4
+#define PT_FPR10_HI 0xE8
+#define PT_FPR10_LO 0xEC
+#define PT_FPR11_HI 0xF0
+#define PT_FPR11_LO 0xF4
+#define PT_FPR12_HI 0xF8
+#define PT_FPR12_LO 0xFC
+#define PT_FPR13_HI 0x100
+#define PT_FPR13_LO 0x104
+#define PT_FPR14_HI 0x108
+#define PT_FPR14_LO 0x10C
+#define PT_FPR15_HI 0x110
+#define PT_FPR15_LO 0x114
+#define PT_CR_9	    0x118
+#define PT_CR_10    0x11C
+#define PT_CR_11    0x120
+#define PT_IEEE_IP  0x13C
+#define PT_LASTOFF  PT_IEEE_IP
+#define PT_ENDREGS  0x140-1
+
+#define GPR_SIZE	4
+#define CR_SIZE		4
+
+#define STACK_FRAME_OVERHEAD	96	/* size of minimum stack frame */
+
+#else /* __s390x__ */
+
+#define PT_PSWMASK  0x00
+#define PT_PSWADDR  0x08
+#define PT_GPR0     0x10
+#define PT_GPR1     0x18
+#define PT_GPR2     0x20
+#define PT_GPR3     0x28
+#define PT_GPR4     0x30
+#define PT_GPR5     0x38
+#define PT_GPR6     0x40
+#define PT_GPR7     0x48
+#define PT_GPR8     0x50
+#define PT_GPR9     0x58
+#define PT_GPR10    0x60
+#define PT_GPR11    0x68
+#define PT_GPR12    0x70
+#define PT_GPR13    0x78
+#define PT_GPR14    0x80
+#define PT_GPR15    0x88
+#define PT_ACR0     0x90
+#define PT_ACR1     0x94
+#define PT_ACR2     0x98
+#define PT_ACR3     0x9C
+#define PT_ACR4	    0xA0
+#define PT_ACR5	    0xA4
+#define PT_ACR6	    0xA8
+#define PT_ACR7	    0xAC
+#define PT_ACR8	    0xB0
+#define PT_ACR9	    0xB4
+#define PT_ACR10    0xB8
+#define PT_ACR11    0xBC
+#define PT_ACR12    0xC0
+#define PT_ACR13    0xC4
+#define PT_ACR14    0xC8
+#define PT_ACR15    0xCC
+#define PT_ORIGGPR2 0xD0
+#define PT_FPC	    0xD8
+#define PT_FPR0     0xE0
+#define PT_FPR1     0xE8
+#define PT_FPR2     0xF0
+#define PT_FPR3     0xF8
+#define PT_FPR4     0x100
+#define PT_FPR5     0x108
+#define PT_FPR6     0x110
+#define PT_FPR7     0x118
+#define PT_FPR8     0x120
+#define PT_FPR9     0x128
+#define PT_FPR10    0x130
+#define PT_FPR11    0x138
+#define PT_FPR12    0x140
+#define PT_FPR13    0x148
+#define PT_FPR14    0x150
+#define PT_FPR15    0x158
+#define PT_CR_9     0x160
+#define PT_CR_10    0x168
+#define PT_CR_11    0x170
+#define PT_IEEE_IP  0x1A8
+#define PT_LASTOFF  PT_IEEE_IP
+#define PT_ENDREGS  0x1B0-1
+
+#define GPR_SIZE	8
+#define CR_SIZE		8
+
+#define STACK_FRAME_OVERHEAD    160      /* size of minimum stack frame */
+
+#endif /* __s390x__ */
+
+#define NUM_GPRS	16
+#define NUM_FPRS	16
+#define NUM_CRS		16
+#define NUM_ACRS	16
+
+#define NUM_CR_WORDS	3
+
+#define FPR_SIZE	8
+#define FPC_SIZE	4
+#define FPC_PAD_SIZE	4 /* gcc insists on aligning the fpregs */
+#define ACR_SIZE	4
+
+
+#define PTRACE_OLDSETOPTIONS         21
+
+#ifndef __ASSEMBLY__
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+typedef union
+{
+	float   f;
+	double  d;
+        __u64   ui;
+	struct
+	{
+		__u32 hi;
+		__u32 lo;
+	} fp;
+} freg_t;
+
+typedef struct
+{
+	__u32   fpc;
+	__u32	pad;
+	freg_t  fprs[NUM_FPRS];              
+} s390_fp_regs;
+
+#define FPC_EXCEPTION_MASK      0xF8000000
+#define FPC_FLAGS_MASK          0x00F80000
+#define FPC_DXC_MASK            0x0000FF00
+#define FPC_RM_MASK             0x00000003
+
+/* this typedef defines how a Program Status Word looks like */
+typedef struct 
+{
+        unsigned long mask;
+        unsigned long addr;
+} __attribute__ ((aligned(8))) psw_t;
+
+#ifndef __s390x__
+
+#define PSW_MASK_PER		0x40000000UL
+#define PSW_MASK_DAT		0x04000000UL
+#define PSW_MASK_IO		0x02000000UL
+#define PSW_MASK_EXT		0x01000000UL
+#define PSW_MASK_KEY		0x00F00000UL
+#define PSW_MASK_BASE		0x00080000UL	/* always one */
+#define PSW_MASK_MCHECK		0x00040000UL
+#define PSW_MASK_WAIT		0x00020000UL
+#define PSW_MASK_PSTATE		0x00010000UL
+#define PSW_MASK_ASC		0x0000C000UL
+#define PSW_MASK_CC		0x00003000UL
+#define PSW_MASK_PM		0x00000F00UL
+#define PSW_MASK_RI		0x00000000UL
+#define PSW_MASK_EA		0x00000000UL
+#define PSW_MASK_BA		0x00000000UL
+
+#define PSW_MASK_USER		0x0000FF00UL
+
+#define PSW_ADDR_AMODE		0x80000000UL
+#define PSW_ADDR_INSN		0x7FFFFFFFUL
+
+#define PSW_DEFAULT_KEY		(((unsigned long) PAGE_DEFAULT_ACC) << 20)
+
+#define PSW_ASC_PRIMARY		0x00000000UL
+#define PSW_ASC_ACCREG		0x00004000UL
+#define PSW_ASC_SECONDARY	0x00008000UL
+#define PSW_ASC_HOME		0x0000C000UL
+
+#else /* __s390x__ */
+
+#define PSW_MASK_PER		0x4000000000000000UL
+#define PSW_MASK_DAT		0x0400000000000000UL
+#define PSW_MASK_IO		0x0200000000000000UL
+#define PSW_MASK_EXT		0x0100000000000000UL
+#define PSW_MASK_BASE		0x0000000000000000UL
+#define PSW_MASK_KEY		0x00F0000000000000UL
+#define PSW_MASK_MCHECK		0x0004000000000000UL
+#define PSW_MASK_WAIT		0x0002000000000000UL
+#define PSW_MASK_PSTATE		0x0001000000000000UL
+#define PSW_MASK_ASC		0x0000C00000000000UL
+#define PSW_MASK_CC		0x0000300000000000UL
+#define PSW_MASK_PM		0x00000F0000000000UL
+#define PSW_MASK_RI		0x0000008000000000UL
+#define PSW_MASK_EA		0x0000000100000000UL
+#define PSW_MASK_BA		0x0000000080000000UL
+
+#define PSW_MASK_USER		0x0000FF0180000000UL
+
+#define PSW_ADDR_AMODE		0x0000000000000000UL
+#define PSW_ADDR_INSN		0xFFFFFFFFFFFFFFFFUL
+
+#define PSW_DEFAULT_KEY		(((unsigned long) PAGE_DEFAULT_ACC) << 52)
+
+#define PSW_ASC_PRIMARY		0x0000000000000000UL
+#define PSW_ASC_ACCREG		0x0000400000000000UL
+#define PSW_ASC_SECONDARY	0x0000800000000000UL
+#define PSW_ASC_HOME		0x0000C00000000000UL
+
+#endif /* __s390x__ */
+
+
+/*
+ * The s390_regs structure is used to define the elf_gregset_t.
+ */
+typedef struct
+{
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+	unsigned int  acrs[NUM_ACRS];
+	unsigned long orig_gpr2;
+} s390_regs;
+
+/*
+ * Now for the user space program event recording (trace) definitions.
+ * The following structures are used only for the ptrace interface, don't
+ * touch or even look at it if you don't want to modify the user-space
+ * ptrace interface. In particular stay away from it for in-kernel PER.
+ */
+typedef struct
+{
+	unsigned long cr[NUM_CR_WORDS];
+} per_cr_words;
+
+#define PER_EM_MASK 0xE8000000UL
+
+typedef	struct
+{
+#ifdef __s390x__
+	unsigned                       : 32;
+#endif /* __s390x__ */
+	unsigned em_branching          : 1;
+	unsigned em_instruction_fetch  : 1;
+	/*
+	 * Switching on storage alteration automatically fixes
+	 * the storage alteration event bit in the users std.
+	 */
+	unsigned em_storage_alteration : 1;
+	unsigned em_gpr_alt_unused     : 1;
+	unsigned em_store_real_address : 1;
+	unsigned                       : 3;
+	unsigned branch_addr_ctl       : 1;
+	unsigned                       : 1;
+	unsigned storage_alt_space_ctl : 1;
+	unsigned                       : 21;
+	unsigned long starting_addr;
+	unsigned long ending_addr;
+} per_cr_bits;
+
+typedef struct
+{
+	unsigned short perc_atmid;
+	unsigned long address;
+	unsigned char access_id;
+} per_lowcore_words;
+
+typedef struct
+{
+	unsigned perc_branching          : 1;
+	unsigned perc_instruction_fetch  : 1;
+	unsigned perc_storage_alteration : 1;
+	unsigned perc_gpr_alt_unused     : 1;
+	unsigned perc_store_real_address : 1;
+	unsigned                         : 3;
+	unsigned atmid_psw_bit_31        : 1;
+	unsigned atmid_validity_bit      : 1;
+	unsigned atmid_psw_bit_32        : 1;
+	unsigned atmid_psw_bit_5         : 1;
+	unsigned atmid_psw_bit_16        : 1;
+	unsigned atmid_psw_bit_17        : 1;
+	unsigned si                      : 2;
+	unsigned long address;
+	unsigned                         : 4;
+	unsigned access_id               : 4;
+} per_lowcore_bits;
+
+typedef struct
+{
+	union {
+		per_cr_words   words;
+		per_cr_bits    bits;
+	} control_regs;
+	/*
+	 * Use these flags instead of setting em_instruction_fetch
+	 * directly they are used so that single stepping can be
+	 * switched on & off while not affecting other tracing
+	 */
+	unsigned  single_step       : 1;
+	unsigned  instruction_fetch : 1;
+	unsigned                    : 30;
+	/*
+	 * These addresses are copied into cr10 & cr11 if single
+	 * stepping is switched off
+	 */
+	unsigned long starting_addr;
+	unsigned long ending_addr;
+	union {
+		per_lowcore_words words;
+		per_lowcore_bits  bits;
+	} lowcore; 
+} per_struct;
+
+typedef struct
+{
+	unsigned int  len;
+	unsigned long kernel_addr;
+	unsigned long process_addr;
+} ptrace_area;
+
+/*
+ * S/390 specific non posix ptrace requests. I chose unusual values so
+ * they are unlikely to clash with future ptrace definitions.
+ */
+#define PTRACE_PEEKUSR_AREA           0x5000
+#define PTRACE_POKEUSR_AREA           0x5001
+#define PTRACE_PEEKTEXT_AREA	      0x5002
+#define PTRACE_PEEKDATA_AREA	      0x5003
+#define PTRACE_POKETEXT_AREA	      0x5004
+#define PTRACE_POKEDATA_AREA 	      0x5005
+#define PTRACE_GET_LAST_BREAK	      0x5006
+#define PTRACE_PEEK_SYSTEM_CALL       0x5007
+#define PTRACE_POKE_SYSTEM_CALL	      0x5008
+#define PTRACE_ENABLE_TE	      0x5009
+#define PTRACE_DISABLE_TE	      0x5010
+#define PTRACE_TE_ABORT_RAND	      0x5011
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK	12	/* resume execution until next branch */
+
+/*
+ * PT_PROT definition is loosely based on hppa bsd definition in
+ * gdb/hppab-nat.c
+ */
+#define PTRACE_PROT                       21
+
+typedef enum
+{
+	ptprot_set_access_watchpoint,
+	ptprot_set_write_watchpoint,
+	ptprot_disable_watchpoint
+} ptprot_flags;
+
+typedef struct
+{
+	unsigned long lowaddr;
+	unsigned long hiaddr;
+	ptprot_flags prot;
+} ptprot_area;                     
+
+/* Sequence of bytes for breakpoint illegal instruction.  */
+#define S390_BREAKPOINT     {0x0,0x1}
+#define S390_BREAKPOINT_U16 ((__u16)0x0001)
+#define S390_SYSCALL_OPCODE ((__u16)0x0a00)
+#define S390_SYSCALL_SIZE   2
+
+/*
+ * The user_regs_struct defines the way the user registers are
+ * store on the stack for signal handling.
+ */
+struct user_regs_struct
+{
+	psw_t psw;
+	unsigned long gprs[NUM_GPRS];
+	unsigned int  acrs[NUM_ACRS];
+	unsigned long orig_gpr2;
+	s390_fp_regs fp_regs;
+	/*
+	 * These per registers are in here so that gdb can modify them
+	 * itself as there is no "official" ptrace interface for hardware
+	 * watchpoints. This is the way intel does it.
+	 */
+	per_struct per_info;
+	unsigned long ieee_instruction_pointer;	/* obsolete, always 0 */
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_PTRACE_H */
diff --git a/arch/s390/include/uapi/asm/qeth.h b/arch/s390/include/uapi/asm/qeth.h
new file mode 100644
index 000000000..3a896cf52
--- /dev/null
+++ b/arch/s390/include/uapi/asm/qeth.h
@@ -0,0 +1,115 @@
+/*
+ * ioctl definitions for qeth driver
+ *
+ * Copyright IBM Corp. 2004
+ *
+ * Author(s):	Thomas Spatzier <tspat@de.ibm.com>
+ *
+ */
+#ifndef __ASM_S390_QETH_IOCTL_H__
+#define __ASM_S390_QETH_IOCTL_H__
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define SIOC_QETH_ARP_SET_NO_ENTRIES    (SIOCDEVPRIVATE)
+#define SIOC_QETH_ARP_QUERY_INFO        (SIOCDEVPRIVATE + 1)
+#define SIOC_QETH_ARP_ADD_ENTRY         (SIOCDEVPRIVATE + 2)
+#define SIOC_QETH_ARP_REMOVE_ENTRY      (SIOCDEVPRIVATE + 3)
+#define SIOC_QETH_ARP_FLUSH_CACHE       (SIOCDEVPRIVATE + 4)
+#define SIOC_QETH_ADP_SET_SNMP_CONTROL  (SIOCDEVPRIVATE + 5)
+#define SIOC_QETH_GET_CARD_TYPE         (SIOCDEVPRIVATE + 6)
+#define SIOC_QETH_QUERY_OAT		(SIOCDEVPRIVATE + 7)
+
+struct qeth_arp_cache_entry {
+	__u8  macaddr[6];
+	__u8  reserved1[2];
+	__u8  ipaddr[16]; /* for both  IPv4 and IPv6 */
+	__u8  reserved2[32];
+} __attribute__ ((packed));
+
+enum qeth_arp_ipaddrtype {
+	QETHARP_IP_ADDR_V4 = 1,
+	QETHARP_IP_ADDR_V6 = 2,
+};
+struct qeth_arp_entrytype {
+	__u8 mac;
+	__u8 ip;
+} __attribute__((packed));
+
+#define QETH_QARP_MEDIASPECIFIC_BYTES 32
+#define QETH_QARP_MACADDRTYPE_BYTES 1
+struct qeth_arp_qi_entry7 {
+	__u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+	struct qeth_arp_entrytype type;
+	__u8 macaddr[6];
+	__u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_ipv6 {
+	__u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+	struct qeth_arp_entrytype type;
+	__u8 macaddr[6];
+	__u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_short {
+	struct qeth_arp_entrytype type;
+	__u8 macaddr[6];
+	__u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry7_short_ipv6 {
+	struct qeth_arp_entrytype type;
+	__u8 macaddr[6];
+	__u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5 {
+	__u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+	struct qeth_arp_entrytype type;
+	__u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_ipv6 {
+	__u8 media_specific[QETH_QARP_MEDIASPECIFIC_BYTES];
+	struct qeth_arp_entrytype type;
+	__u8 ipaddr[16];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_short {
+	struct qeth_arp_entrytype type;
+	__u8 ipaddr[4];
+} __attribute__((packed));
+
+struct qeth_arp_qi_entry5_short_ipv6 {
+	struct qeth_arp_entrytype type;
+	__u8 ipaddr[16];
+} __attribute__((packed));
+/*
+ * can be set by user if no "media specific information" is wanted
+ * -> saves a lot of space in user space buffer
+ */
+#define QETH_QARP_STRIP_ENTRIES  0x8000
+#define QETH_QARP_WITH_IPV6	 0x4000
+#define QETH_QARP_REQUEST_MASK   0x00ff
+
+/* data sent to user space as result of query arp ioctl */
+#define QETH_QARP_USER_DATA_SIZE 20000
+#define QETH_QARP_MASK_OFFSET    4
+#define QETH_QARP_ENTRIES_OFFSET 6
+struct qeth_arp_query_user_data {
+	union {
+		__u32 data_len;		/* set by user space program */
+		__u32 no_entries;	/* set by kernel */
+	} u;
+	__u16 mask_bits;
+	char *entries;
+} __attribute__((packed));
+
+struct qeth_query_oat_data {
+	__u32 command;
+	__u32 buffer_len;
+	__u32 response_len;
+	__u64 ptr;
+};
+#endif /* __ASM_S390_QETH_IOCTL_H__ */
diff --git a/arch/s390/include/uapi/asm/resource.h b/arch/s390/include/uapi/asm/resource.h
new file mode 100644
index 000000000..ec23d1c73
--- /dev/null
+++ b/arch/s390/include/uapi/asm/resource.h
@@ -0,0 +1,13 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/resources.h"
+ */
+
+#ifndef _S390_RESOURCE_H
+#define _S390_RESOURCE_H
+
+#include <asm-generic/resource.h>
+
+#endif
+
diff --git a/arch/s390/include/uapi/asm/schid.h b/arch/s390/include/uapi/asm/schid.h
new file mode 100644
index 000000000..32f3ab2a8
--- /dev/null
+++ b/arch/s390/include/uapi/asm/schid.h
@@ -0,0 +1,16 @@
+#ifndef _UAPIASM_SCHID_H
+#define _UAPIASM_SCHID_H
+
+#include <linux/types.h>
+
+struct subchannel_id {
+	__u32 cssid : 8;
+	__u32 : 4;
+	__u32 m : 1;
+	__u32 ssid : 2;
+	__u32 one : 1;
+	__u32 sch_no : 16;
+} __attribute__ ((packed, aligned(4)));
+
+
+#endif /* _UAPIASM_SCHID_H */
diff --git a/arch/s390/include/uapi/asm/sclp_ctl.h b/arch/s390/include/uapi/asm/sclp_ctl.h
new file mode 100644
index 000000000..f2818613e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sclp_ctl.h
@@ -0,0 +1,24 @@
+/*
+ * IOCTL interface for SCLP
+ *
+ * Copyright IBM Corp. 2012
+ *
+ * Author: Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_SCLP_CTL_H
+#define _ASM_SCLP_CTL_H
+
+#include <linux/types.h>
+
+struct sclp_ctl_sccb {
+	__u32	cmdw;
+	__u64	sccb;
+} __attribute__((packed));
+
+#define SCLP_CTL_IOCTL_MAGIC 0x10
+
+#define SCLP_CTL_SCCB \
+	_IOWR(SCLP_CTL_IOCTL_MAGIC, 0x10, struct sclp_ctl_sccb)
+
+#endif
diff --git a/arch/s390/include/uapi/asm/sembuf.h b/arch/s390/include/uapi/asm/sembuf.h
new file mode 100644
index 000000000..32626b0ca
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sembuf.h
@@ -0,0 +1,29 @@
+#ifndef _S390_SEMBUF_H
+#define _S390_SEMBUF_H
+
+/* 
+ * The semid64_ds structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem (for !__s390x__)
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+	__kernel_time_t	sem_otime;		/* last semop time */
+#ifndef __s390x__
+	unsigned long	__unused1;
+#endif /* ! __s390x__ */
+	__kernel_time_t	sem_ctime;		/* last change time */
+#ifndef __s390x__
+	unsigned long	__unused2;
+#endif /* ! __s390x__ */
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* _S390_SEMBUF_H */
diff --git a/arch/s390/include/uapi/asm/setup.h b/arch/s390/include/uapi/asm/setup.h
new file mode 100644
index 000000000..5a637e3e3
--- /dev/null
+++ b/arch/s390/include/uapi/asm/setup.h
@@ -0,0 +1,13 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2010
+ */
+
+#ifndef _UAPI_ASM_S390_SETUP_H
+#define _UAPI_ASM_S390_SETUP_H
+
+#define COMMAND_LINE_SIZE	4096
+
+#define ARCH_COMMAND_LINE_SIZE	896
+
+#endif /* _UAPI_ASM_S390_SETUP_H */
diff --git a/arch/s390/include/uapi/asm/shmbuf.h b/arch/s390/include/uapi/asm/shmbuf.h
new file mode 100644
index 000000000..eed2e280c
--- /dev/null
+++ b/arch/s390/include/uapi/asm/shmbuf.h
@@ -0,0 +1,48 @@
+#ifndef _S390_SHMBUF_H
+#define _S390_SHMBUF_H
+
+/* 
+ * The shmid64_ds structure for S/390 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem (for !__s390x__)
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_time_t		shm_atime;	/* last attach time */
+#ifndef __s390x__
+	unsigned long		__unused1;
+#endif /* ! __s390x__ */
+	__kernel_time_t		shm_dtime;	/* last detach time */
+#ifndef __s390x__
+	unsigned long		__unused2;
+#endif /* ! __s390x__ */
+	__kernel_time_t		shm_ctime;	/* last change time */
+#ifndef __s390x__
+	unsigned long		__unused3;
+#endif /* ! __s390x__ */
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* _S390_SHMBUF_H */
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 000000000..ee69c0854
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,244 @@
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#define diagnose_codes						\
+	{ 0x10, "DIAG (0x10) release pages" },			\
+	{ 0x44, "DIAG (0x44) time slice end" },			\
+	{ 0x9c, "DIAG (0x9c) time slice end directed" },	\
+	{ 0x204, "DIAG (0x204) logical-cpu utilization" },	\
+	{ 0x258, "DIAG (0x258) page-reference services" },	\
+	{ 0x308, "DIAG (0x308) ipl functions" },		\
+	{ 0x500, "DIAG (0x500) KVM virtio functions" },		\
+	{ 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes					\
+	{ 0x01, "SIGP sense" },					\
+	{ 0x02, "SIGP external call" },				\
+	{ 0x03, "SIGP emergency signal" },			\
+	{ 0x05, "SIGP stop" },					\
+	{ 0x06, "SIGP restart" },				\
+	{ 0x09, "SIGP stop and store status" },			\
+	{ 0x0b, "SIGP initial cpu reset" },			\
+	{ 0x0d, "SIGP set prefix" },				\
+	{ 0x0e, "SIGP store status at address" },		\
+	{ 0x12, "SIGP set architecture" },			\
+	{ 0x15, "SIGP sense running" }
+
+#define icpt_prog_codes						\
+	{ 0x0001, "Prog Operation" },				\
+	{ 0x0002, "Prog Privileged Operation" },		\
+	{ 0x0003, "Prog Execute" },				\
+	{ 0x0004, "Prog Protection" },				\
+	{ 0x0005, "Prog Addressing" },				\
+	{ 0x0006, "Prog Specification" },			\
+	{ 0x0007, "Prog Data" },				\
+	{ 0x0008, "Prog Fixedpoint overflow" },			\
+	{ 0x0009, "Prog Fixedpoint divide" },			\
+	{ 0x000A, "Prog Decimal overflow" },			\
+	{ 0x000B, "Prog Decimal divide" },			\
+	{ 0x000C, "Prog HFP exponent overflow" },		\
+	{ 0x000D, "Prog HFP exponent underflow" },		\
+	{ 0x000E, "Prog HFP significance" },			\
+	{ 0x000F, "Prog HFP divide" },				\
+	{ 0x0010, "Prog Segment translation" },			\
+	{ 0x0011, "Prog Page translation" },			\
+	{ 0x0012, "Prog Translation specification" },		\
+	{ 0x0013, "Prog Special operation" },			\
+	{ 0x0015, "Prog Operand" },				\
+	{ 0x0016, "Prog Trace table" },				\
+	{ 0x0017, "Prog ASNtranslation specification" },	\
+	{ 0x001C, "Prog Spaceswitch event" },			\
+	{ 0x001D, "Prog HFP square root" },			\
+	{ 0x001F, "Prog PCtranslation specification" },		\
+	{ 0x0020, "Prog AFX translation" },			\
+	{ 0x0021, "Prog ASX translation" },			\
+	{ 0x0022, "Prog LX translation" },			\
+	{ 0x0023, "Prog EX translation" },			\
+	{ 0x0024, "Prog Primary authority" },			\
+	{ 0x0025, "Prog Secondary authority" },			\
+	{ 0x0026, "Prog LFXtranslation exception" },		\
+	{ 0x0027, "Prog LSXtranslation exception" },		\
+	{ 0x0028, "Prog ALET specification" },			\
+	{ 0x0029, "Prog ALEN translation" },			\
+	{ 0x002A, "Prog ALE sequence" },			\
+	{ 0x002B, "Prog ASTE validity" },			\
+	{ 0x002C, "Prog ASTE sequence" },			\
+	{ 0x002D, "Prog Extended authority" },			\
+	{ 0x002E, "Prog LSTE sequence" },			\
+	{ 0x002F, "Prog ASTE instance" },			\
+	{ 0x0030, "Prog Stack full" },				\
+	{ 0x0031, "Prog Stack empty" },				\
+	{ 0x0032, "Prog Stack specification" },			\
+	{ 0x0033, "Prog Stack type" },				\
+	{ 0x0034, "Prog Stack operation" },			\
+	{ 0x0039, "Prog Region first translation" },		\
+	{ 0x003A, "Prog Region second translation" },		\
+	{ 0x003B, "Prog Region third translation" },		\
+	{ 0x0040, "Prog Monitor event" },			\
+	{ 0x0080, "Prog PER event" },				\
+	{ 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic)		\
+	{ (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic)			\
+	{ opcode, mnemonic }
+
+#define icpt_insn_codes				\
+	exit_code_ipa0(0x01, 0x01, "PR"),	\
+	exit_code_ipa0(0x01, 0x04, "PTFF"),	\
+	exit_code_ipa0(0x01, 0x07, "SCKPF"),	\
+	exit_code_ipa0(0xAA, 0x00, "RINEXT"),	\
+	exit_code_ipa0(0xAA, 0x01, "RION"),	\
+	exit_code_ipa0(0xAA, 0x02, "TRIC"),	\
+	exit_code_ipa0(0xAA, 0x03, "RIOFF"),	\
+	exit_code_ipa0(0xAA, 0x04, "RIEMIT"),	\
+	exit_code_ipa0(0xB2, 0x02, "STIDP"),	\
+	exit_code_ipa0(0xB2, 0x04, "SCK"),	\
+	exit_code_ipa0(0xB2, 0x05, "STCK"),	\
+	exit_code_ipa0(0xB2, 0x06, "SCKC"),	\
+	exit_code_ipa0(0xB2, 0x07, "STCKC"),	\
+	exit_code_ipa0(0xB2, 0x08, "SPT"),	\
+	exit_code_ipa0(0xB2, 0x09, "STPT"),	\
+	exit_code_ipa0(0xB2, 0x0d, "PTLB"),	\
+	exit_code_ipa0(0xB2, 0x10, "SPX"),	\
+	exit_code_ipa0(0xB2, 0x11, "STPX"),	\
+	exit_code_ipa0(0xB2, 0x12, "STAP"),	\
+	exit_code_ipa0(0xB2, 0x14, "SIE"),	\
+	exit_code_ipa0(0xB2, 0x16, "SETR"),	\
+	exit_code_ipa0(0xB2, 0x17, "STETR"),	\
+	exit_code_ipa0(0xB2, 0x18, "PC"),	\
+	exit_code_ipa0(0xB2, 0x20, "SERVC"),	\
+	exit_code_ipa0(0xB2, 0x21, "IPTE"),	\
+	exit_code_ipa0(0xB2, 0x28, "PT"),	\
+	exit_code_ipa0(0xB2, 0x29, "ISKE"),	\
+	exit_code_ipa0(0xB2, 0x2a, "RRBE"),	\
+	exit_code_ipa0(0xB2, 0x2b, "SSKE"),	\
+	exit_code_ipa0(0xB2, 0x2c, "TB"),	\
+	exit_code_ipa0(0xB2, 0x2e, "PGIN"),	\
+	exit_code_ipa0(0xB2, 0x2f, "PGOUT"),	\
+	exit_code_ipa0(0xB2, 0x30, "CSCH"),	\
+	exit_code_ipa0(0xB2, 0x31, "HSCH"),	\
+	exit_code_ipa0(0xB2, 0x32, "MSCH"),	\
+	exit_code_ipa0(0xB2, 0x33, "SSCH"),	\
+	exit_code_ipa0(0xB2, 0x34, "STSCH"),	\
+	exit_code_ipa0(0xB2, 0x35, "TSCH"),	\
+	exit_code_ipa0(0xB2, 0x36, "TPI"),	\
+	exit_code_ipa0(0xB2, 0x37, "SAL"),	\
+	exit_code_ipa0(0xB2, 0x38, "RSCH"),	\
+	exit_code_ipa0(0xB2, 0x39, "STCRW"),	\
+	exit_code_ipa0(0xB2, 0x3a, "STCPS"),	\
+	exit_code_ipa0(0xB2, 0x3b, "RCHP"),	\
+	exit_code_ipa0(0xB2, 0x3c, "SCHM"),	\
+	exit_code_ipa0(0xB2, 0x40, "BAKR"),	\
+	exit_code_ipa0(0xB2, 0x48, "PALB"),	\
+	exit_code_ipa0(0xB2, 0x4c, "TAR"),	\
+	exit_code_ipa0(0xB2, 0x50, "CSP"),	\
+	exit_code_ipa0(0xB2, 0x54, "MVPG"),	\
+	exit_code_ipa0(0xB2, 0x58, "BSG"),	\
+	exit_code_ipa0(0xB2, 0x5a, "BSA"),	\
+	exit_code_ipa0(0xB2, 0x5f, "CHSC"),	\
+	exit_code_ipa0(0xB2, 0x74, "SIGA"),	\
+	exit_code_ipa0(0xB2, 0x76, "XSCH"),	\
+	exit_code_ipa0(0xB2, 0x78, "STCKE"),	\
+	exit_code_ipa0(0xB2, 0x7c, "STCKF"),	\
+	exit_code_ipa0(0xB2, 0x7d, "STSI"),	\
+	exit_code_ipa0(0xB2, 0xb0, "STFLE"),	\
+	exit_code_ipa0(0xB2, 0xb1, "STFL"),	\
+	exit_code_ipa0(0xB2, 0xb2, "LPSWE"),	\
+	exit_code_ipa0(0xB2, 0xf8, "TEND"),	\
+	exit_code_ipa0(0xB2, 0xfc, "TABORT"),	\
+	exit_code_ipa0(0xB9, 0x1e, "KMAC"),	\
+	exit_code_ipa0(0xB9, 0x28, "PCKMO"),	\
+	exit_code_ipa0(0xB9, 0x2a, "KMF"),	\
+	exit_code_ipa0(0xB9, 0x2b, "KMO"),	\
+	exit_code_ipa0(0xB9, 0x2d, "KMCTR"),	\
+	exit_code_ipa0(0xB9, 0x2e, "KM"),	\
+	exit_code_ipa0(0xB9, 0x2f, "KMC"),	\
+	exit_code_ipa0(0xB9, 0x3e, "KIMD"),	\
+	exit_code_ipa0(0xB9, 0x3f, "KLMD"),	\
+	exit_code_ipa0(0xB9, 0x8a, "CSPG"),	\
+	exit_code_ipa0(0xB9, 0x8d, "EPSW"),	\
+	exit_code_ipa0(0xB9, 0x8e, "IDTE"),	\
+	exit_code_ipa0(0xB9, 0x8f, "CRDTE"),	\
+	exit_code_ipa0(0xB9, 0x9c, "EQBS"),	\
+	exit_code_ipa0(0xB9, 0xa2, "PTF"),	\
+	exit_code_ipa0(0xB9, 0xab, "ESSA"),	\
+	exit_code_ipa0(0xB9, 0xae, "RRBM"),	\
+	exit_code_ipa0(0xB9, 0xaf, "PFMF"),	\
+	exit_code_ipa0(0xE3, 0x03, "LRAG"),	\
+	exit_code_ipa0(0xE3, 0x13, "LRAY"),	\
+	exit_code_ipa0(0xE3, 0x25, "NTSTG"),	\
+	exit_code_ipa0(0xE5, 0x00, "LASP"),	\
+	exit_code_ipa0(0xE5, 0x01, "TPROT"),	\
+	exit_code_ipa0(0xE5, 0x60, "TBEGIN"),	\
+	exit_code_ipa0(0xE5, 0x61, "TBEGINC"),	\
+	exit_code_ipa0(0xEB, 0x25, "STCTG"),	\
+	exit_code_ipa0(0xEB, 0x2f, "LCTLG"),	\
+	exit_code_ipa0(0xEB, 0x60, "LRIC"),	\
+	exit_code_ipa0(0xEB, 0x61, "STRIC"),	\
+	exit_code_ipa0(0xEB, 0x62, "MRIC"),	\
+	exit_code_ipa0(0xEB, 0x8a, "SQBS"),	\
+	exit_code_ipa0(0xC8, 0x01, "ECTG"),	\
+	exit_code(0x0a, "SVC"),			\
+	exit_code(0x80, "SSM"),			\
+	exit_code(0x82, "LPSW"),		\
+	exit_code(0x83, "DIAG"),		\
+	exit_code(0xae, "SIGP"),		\
+	exit_code(0xac, "STNSM"),		\
+	exit_code(0xad, "STOSM"),		\
+	exit_code(0xb1, "LRA"),			\
+	exit_code(0xb6, "STCTL"),		\
+	exit_code(0xb7, "LCTL"),		\
+	exit_code(0xee, "PLO")
+
+#define sie_intercept_code					\
+	{ 0x00, "Host interruption" },				\
+	{ 0x04, "Instruction" },				\
+	{ 0x08, "Program interruption" },			\
+	{ 0x0c, "Instruction and program interruption" },	\
+	{ 0x10, "External request" },				\
+	{ 0x14, "External interruption" },			\
+	{ 0x18, "I/O request" },				\
+	{ 0x1c, "Wait state" },					\
+	{ 0x20, "Validity" },					\
+	{ 0x28, "Stop request" },				\
+	{ 0x2c, "Operation exception" },			\
+	{ 0x38, "Partial-execution" },				\
+	{ 0x3c, "I/O interruption" },				\
+	{ 0x40, "I/O instruction" },				\
+	{ 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask)		\
+	(insn >> 56) == (ipa0) ?				\
+		((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn) (		\
+	INSN_DECODE_IPA0(0x01, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)	\
+	INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xb9, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe3, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)	\
+	INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)	\
+	INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)	\
+	INSN_DECODE(insn))
+
+#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h
new file mode 100644
index 000000000..5f0b8d7dd
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sigcontext.h
@@ -0,0 +1,84 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ */
+
+#ifndef _ASM_S390_SIGCONTEXT_H
+#define _ASM_S390_SIGCONTEXT_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define __NUM_GPRS		16
+#define __NUM_FPRS		16
+#define __NUM_ACRS		16
+#define __NUM_VXRS		32
+#define __NUM_VXRS_LOW		16
+#define __NUM_VXRS_HIGH		16
+
+#ifndef __s390x__
+
+/* Has to be at least _NSIG_WORDS from asm/signal.h */
+#define _SIGCONTEXT_NSIG	64
+#define _SIGCONTEXT_NSIG_BPW	32
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE	96
+
+#else /* __s390x__ */
+
+/* Has to be at least _NSIG_WORDS from asm/signal.h */
+#define _SIGCONTEXT_NSIG	64
+#define _SIGCONTEXT_NSIG_BPW	64 
+/* Size of stack frame allocated when calling signal handler. */
+#define __SIGNAL_FRAMESIZE	160
+
+#endif /* __s390x__ */
+
+#define _SIGCONTEXT_NSIG_WORDS	(_SIGCONTEXT_NSIG / _SIGCONTEXT_NSIG_BPW)
+#define _SIGMASK_COPY_SIZE	(sizeof(unsigned long)*_SIGCONTEXT_NSIG_WORDS)
+
+typedef struct 
+{
+        unsigned long mask;
+        unsigned long addr;
+} __attribute__ ((aligned(8))) _psw_t;
+
+typedef struct
+{
+	_psw_t psw;
+	unsigned long gprs[__NUM_GPRS];
+	unsigned int  acrs[__NUM_ACRS];
+} _s390_regs_common;
+
+typedef struct
+{
+	unsigned int fpc;
+	unsigned int pad;
+	double   fprs[__NUM_FPRS];
+} _s390_fp_regs;
+
+typedef struct
+{
+	_s390_regs_common regs;
+	_s390_fp_regs     fpregs;
+} _sigregs;
+
+typedef struct
+{
+#ifndef __s390x__
+	unsigned long gprs_high[__NUM_GPRS];
+#endif
+	unsigned long long vxrs_low[__NUM_VXRS_LOW];
+	__vector128 vxrs_high[__NUM_VXRS_HIGH];
+	unsigned char __reserved[128];
+} _sigregs_ext;
+
+struct sigcontext
+{
+	unsigned long	oldmask[_SIGCONTEXT_NSIG_WORDS];
+	_sigregs        __user *sregs;
+};
+
+
+#endif
+
diff --git a/arch/s390/include/uapi/asm/siginfo.h b/arch/s390/include/uapi/asm/siginfo.h
new file mode 100644
index 000000000..91fd3e4b7
--- /dev/null
+++ b/arch/s390/include/uapi/asm/siginfo.h
@@ -0,0 +1,16 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/siginfo.h"
+ */
+
+#ifndef _S390_SIGINFO_H
+#define _S390_SIGINFO_H
+
+#ifdef __s390x__
+#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+#endif
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h
new file mode 100644
index 000000000..2f43cfbf5
--- /dev/null
+++ b/arch/s390/include/uapi/asm/signal.h
@@ -0,0 +1,129 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/signal.h"
+ */
+
+#ifndef _UAPI_ASMS390_SIGNAL_H
+#define _UAPI_ASMS390_SIGNAL_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+struct pt_regs;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG            32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP           1
+#define SIGINT           2
+#define SIGQUIT          3
+#define SIGILL           4
+#define SIGTRAP          5
+#define SIGABRT          6
+#define SIGIOT           6
+#define SIGBUS           7
+#define SIGFPE           8
+#define SIGKILL          9
+#define SIGUSR1         10
+#define SIGSEGV         11
+#define SIGUSR2         12
+#define SIGPIPE         13
+#define SIGALRM         14
+#define SIGTERM         15
+#define SIGSTKFLT       16
+#define SIGCHLD         17
+#define SIGCONT         18
+#define SIGSTOP         19
+#define SIGTSTP         20
+#define SIGTTIN         21
+#define SIGTTOU         22
+#define SIGURG          23
+#define SIGXCPU         24
+#define SIGXFSZ         25
+#define SIGVTALRM       26
+#define SIGPROF         27
+#define SIGWINCH        28
+#define SIGIO           29
+#define SIGPOLL         SIGIO
+/*
+#define SIGLOST         29
+*/
+#define SIGPWR          30
+#define SIGSYS		31
+#define SIGUNUSED       31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN        32
+#define SIGRTMAX        _NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP    0x00000001
+#define SA_NOCLDWAIT    0x00000002
+#define SA_SIGINFO      0x00000004
+#define SA_ONSTACK      0x08000000
+#define SA_RESTART      0x10000000
+#define SA_NODEFER      0x40000000
+#define SA_RESETHAND    0x80000000
+
+#define SA_NOMASK       SA_NODEFER
+#define SA_ONESHOT      SA_RESETHAND
+
+#define SA_RESTORER     0x04000000
+
+#define MINSIGSTKSZ     2048
+#define SIGSTKSZ        8192
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+struct sigaction {
+        union {
+          __sighandler_t _sa_handler;
+          void (*_sa_sigaction)(int, struct siginfo *, void *);
+        } _u;
+#ifndef __s390x__ /* lovely */
+        sigset_t sa_mask;
+        unsigned long sa_flags;
+        void (*sa_restorer)(void);
+#else  /* __s390x__ */
+        unsigned long sa_flags;
+        void (*sa_restorer)(void);
+	sigset_t sa_mask;
+#endif /* __s390x__ */
+};
+
+#define sa_handler      _u._sa_handler
+#define sa_sigaction    _u._sa_sigaction
+
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+        void __user *ss_sp;
+        int ss_flags;
+        size_t ss_size;
+} stack_t;
+
+
+#endif /* _UAPI_ASMS390_SIGNAL_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
new file mode 100644
index 000000000..296942d56
--- /dev/null
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -0,0 +1,94 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/socket.h"
+ */
+
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+#define SO_REUSEPORT	15
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO	20
+#define SO_SNDTIMEO	21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        26
+#define SO_DETACH_FILTER        27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
+
+#define SO_PEERNAME		28
+#define SO_TIMESTAMP		29
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC		31
+#define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+#define SO_MARK			36
+
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
+#define SO_PROTOCOL		38
+#define SO_DOMAIN		39
+
+#define SO_RXQ_OVFL             40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+#define SO_PEEK_OFF		42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS		43
+
+#define SO_LOCK_FILTER		44
+
+#define SO_SELECT_ERR_QUEUE	45
+
+#define SO_BUSY_POLL		46
+
+#define SO_MAX_PACING_RATE	47
+
+#define SO_BPF_EXTENSIONS	48
+
+#define SO_INCOMING_CPU		49
+
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
+#endif /* _ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/sockios.h b/arch/s390/include/uapi/asm/sockios.h
new file mode 100644
index 000000000..6f60eee73
--- /dev/null
+++ b/arch/s390/include/uapi/asm/sockios.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_SOCKIOS_H
+#define _ASM_S390_SOCKIOS_H
+
+#include <asm-generic/sockios.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/stat.h b/arch/s390/include/uapi/asm/stat.h
new file mode 100644
index 000000000..b4ca97d91
--- /dev/null
+++ b/arch/s390/include/uapi/asm/stat.h
@@ -0,0 +1,103 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/stat.h"
+ */
+
+#ifndef _S390_STAT_H
+#define _S390_STAT_H
+
+#ifndef __s390x__
+struct __old_kernel_stat {
+        unsigned short st_dev;
+        unsigned short st_ino;
+        unsigned short st_mode;
+        unsigned short st_nlink;
+        unsigned short st_uid;
+        unsigned short st_gid;
+        unsigned short st_rdev;
+        unsigned long  st_size;
+        unsigned long  st_atime;
+        unsigned long  st_mtime;
+        unsigned long  st_ctime;
+};
+
+struct stat {
+        unsigned short st_dev;
+        unsigned short __pad1;
+        unsigned long  st_ino;
+        unsigned short st_mode;
+        unsigned short st_nlink;
+        unsigned short st_uid;
+        unsigned short st_gid;
+        unsigned short st_rdev;
+        unsigned short __pad2;
+        unsigned long  st_size;
+        unsigned long  st_blksize;
+        unsigned long  st_blocks;
+        unsigned long  st_atime;
+        unsigned long  st_atime_nsec;
+        unsigned long  st_mtime;
+        unsigned long  st_mtime_nsec;
+        unsigned long  st_ctime;
+        unsigned long  st_ctime_nsec;
+        unsigned long  __unused4;
+        unsigned long  __unused5;
+};
+
+/* This matches struct stat64 in glibc2.1, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ */
+struct stat64 {
+        unsigned long long	st_dev;
+        unsigned int    __pad1;
+#define STAT64_HAS_BROKEN_ST_INO        1
+        unsigned long   __st_ino;
+        unsigned int    st_mode;
+        unsigned int    st_nlink;
+        unsigned long   st_uid;
+        unsigned long   st_gid;
+        unsigned long long	st_rdev;
+        unsigned int    __pad3;
+        long long	st_size;
+        unsigned long   st_blksize;
+        unsigned char   __pad4[4];
+        unsigned long   __pad5;     /* future possible st_blocks high bits */
+        unsigned long   st_blocks;  /* Number 512-byte blocks allocated. */
+        unsigned long   st_atime;
+        unsigned long   st_atime_nsec;
+        unsigned long   st_mtime;
+        unsigned long   st_mtime_nsec;
+        unsigned long   st_ctime;
+        unsigned long   st_ctime_nsec;  /* will be high 32 bits of ctime someday */
+        unsigned long long	st_ino;
+};
+
+#else /* __s390x__ */
+
+struct stat {
+        unsigned long  st_dev;
+        unsigned long  st_ino;
+        unsigned long  st_nlink;
+        unsigned int   st_mode;
+        unsigned int   st_uid;
+        unsigned int   st_gid;
+        unsigned int   __pad1;
+        unsigned long  st_rdev;
+        unsigned long  st_size;
+        unsigned long  st_atime;
+	unsigned long  st_atime_nsec;
+        unsigned long  st_mtime;
+	unsigned long  st_mtime_nsec;
+        unsigned long  st_ctime;
+	unsigned long  st_ctime_nsec;
+        unsigned long  st_blksize;
+        long           st_blocks;
+        unsigned long  __unused[3];
+};
+
+#endif /* __s390x__ */
+
+#define STAT_HAVE_NSEC 1
+
+#endif
diff --git a/arch/s390/include/uapi/asm/statfs.h b/arch/s390/include/uapi/asm/statfs.h
new file mode 100644
index 000000000..471eb0918
--- /dev/null
+++ b/arch/s390/include/uapi/asm/statfs.h
@@ -0,0 +1,50 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/statfs.h"
+ */
+
+#ifndef _S390_STATFS_H
+#define _S390_STATFS_H
+
+/*
+ * We can't use <asm-generic/statfs.h> because in 64-bit mode
+ * we mix ints of different sizes in our struct statfs.
+ */
+
+#ifndef __KERNEL_STRICT_NAMES
+#include <linux/types.h>
+typedef __kernel_fsid_t	fsid_t;
+#endif
+
+struct statfs {
+	unsigned int	f_type;
+	unsigned int	f_bsize;
+	unsigned long	f_blocks;
+	unsigned long	f_bfree;
+	unsigned long	f_bavail;
+	unsigned long	f_files;
+	unsigned long	f_ffree;
+	__kernel_fsid_t f_fsid;
+	unsigned int	f_namelen;
+	unsigned int	f_frsize;
+	unsigned int	f_flags;
+	unsigned int	f_spare[4];
+};
+
+struct statfs64 {
+	unsigned int	f_type;
+	unsigned int	f_bsize;
+	unsigned long long f_blocks;
+	unsigned long long f_bfree;
+	unsigned long long f_bavail;
+	unsigned long long f_files;
+	unsigned long long f_ffree;
+	__kernel_fsid_t f_fsid;
+	unsigned int	f_namelen;
+	unsigned int	f_frsize;
+	unsigned int	f_flags;
+	unsigned int	f_spare[4];
+};
+
+#endif
diff --git a/arch/s390/include/uapi/asm/swab.h b/arch/s390/include/uapi/asm/swab.h
new file mode 100644
index 000000000..da3bfe5cc
--- /dev/null
+++ b/arch/s390/include/uapi/asm/swab.h
@@ -0,0 +1,89 @@
+#ifndef _S390_SWAB_H
+#define _S390_SWAB_H
+
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/types.h>
+
+#ifndef __s390x__
+# define __SWAB_64_THRU_32__
+#endif
+
+#ifdef __s390x__
+static inline __u64 __arch_swab64p(const __u64 *x)
+{
+	__u64 result;
+
+	asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x));
+	return result;
+}
+#define __arch_swab64p __arch_swab64p
+
+static inline __u64 __arch_swab64(__u64 x)
+{
+	__u64 result;
+
+	asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x));
+	return result;
+}
+#define __arch_swab64 __arch_swab64
+
+static inline void __arch_swab64s(__u64 *x)
+{
+	*x = __arch_swab64p(x);
+}
+#define __arch_swab64s __arch_swab64s
+#endif /* __s390x__ */
+
+static inline __u32 __arch_swab32p(const __u32 *x)
+{
+	__u32 result;
+	
+	asm volatile(
+#ifndef __s390x__
+		"	icm	%0,8,%O1+3(%R1)\n"
+		"	icm	%0,4,%O1+2(%R1)\n"
+		"	icm	%0,2,%O1+1(%R1)\n"
+		"	ic	%0,%1"
+		: "=&d" (result) : "Q" (*x) : "cc");
+#else /* __s390x__ */
+		"	lrv	%0,%1"
+		: "=d" (result) : "m" (*x));
+#endif /* __s390x__ */
+	return result;
+}
+#define __arch_swab32p __arch_swab32p
+
+#ifdef __s390x__
+static inline __u32 __arch_swab32(__u32 x)
+{
+	__u32 result;
+	
+	asm volatile("lrvr  %0,%1" : "=d" (result) : "d" (x));
+	return result;
+}
+#define __arch_swab32 __arch_swab32
+#endif /* __s390x__ */
+
+static inline __u16 __arch_swab16p(const __u16 *x)
+{
+	__u16 result;
+	
+	asm volatile(
+#ifndef __s390x__
+		"	icm	%0,2,%O1+1(%R1)\n"
+		"	ic	%0,%1\n"
+		: "=&d" (result) : "Q" (*x) : "cc");
+#else /* __s390x__ */
+		"	lrvh	%0,%1"
+		: "=d" (result) : "m" (*x));
+#endif /* __s390x__ */
+	return result;
+}
+#define __arch_swab16p __arch_swab16p
+
+#endif /* _S390_SWAB_H */
diff --git a/arch/s390/include/uapi/asm/tape390.h b/arch/s390/include/uapi/asm/tape390.h
new file mode 100644
index 000000000..b2bc4bab7
--- /dev/null
+++ b/arch/s390/include/uapi/asm/tape390.h
@@ -0,0 +1,102 @@
+/*************************************************************************
+ *
+ *	   enables user programs to display messages and control encryption
+ *	   on s390 tape devices
+ *
+ *	   Copyright IBM Corp. 2001, 2006
+ *	   Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *
+ *************************************************************************/
+
+#ifndef _TAPE390_H
+#define _TAPE390_H
+
+#define TAPE390_DISPLAY _IOW('d', 1, struct display_struct)
+
+/*
+ * The TAPE390_DISPLAY ioctl calls the Load Display command
+ * which transfers 17 bytes of data from the channel to the subsystem:
+ *     - 1 format control byte, and
+ *     - two 8-byte messages
+ *
+ * Format control byte:
+ *   0-2: New Message Overlay
+ *     3: Alternate Messages
+ *     4: Blink Message
+ *     5: Display Low/High Message
+ *     6: Reserved
+ *     7: Automatic Load Request
+ *
+ */
+
+typedef struct display_struct {
+        char cntrl;
+        char message1[8];
+        char message2[8];
+} display_struct;
+
+/*
+ * Tape encryption support
+ */
+
+struct tape390_crypt_info {
+	char capability;
+	char status;
+	char medium_status;
+} __attribute__ ((packed));
+
+
+/* Macros for "capable" field */
+#define TAPE390_CRYPT_SUPPORTED_MASK 0x01
+#define TAPE390_CRYPT_SUPPORTED(x) \
+	((x.capability & TAPE390_CRYPT_SUPPORTED_MASK))
+
+/* Macros for "status" field */
+#define TAPE390_CRYPT_ON_MASK 0x01
+#define TAPE390_CRYPT_ON(x) (((x.status) & TAPE390_CRYPT_ON_MASK))
+
+/* Macros for "medium status" field */
+#define TAPE390_MEDIUM_LOADED_MASK 0x01
+#define TAPE390_MEDIUM_ENCRYPTED_MASK 0x02
+#define TAPE390_MEDIUM_ENCRYPTED(x) \
+	(((x.medium_status) & TAPE390_MEDIUM_ENCRYPTED_MASK))
+#define TAPE390_MEDIUM_LOADED(x) \
+	(((x.medium_status) & TAPE390_MEDIUM_LOADED_MASK))
+
+/*
+ * The TAPE390_CRYPT_SET ioctl is used to switch on/off encryption.
+ * The "encryption_capable" and "tape_status" fields are ignored for this ioctl!
+ */
+#define TAPE390_CRYPT_SET _IOW('d', 2, struct tape390_crypt_info)
+
+/*
+ * The TAPE390_CRYPT_QUERY ioctl is used to query the encryption state.
+ */
+#define TAPE390_CRYPT_QUERY _IOR('d', 3, struct tape390_crypt_info)
+
+/* Values for "kekl1/2_type" and "kekl1/2_type_on_tape" fields */
+#define TAPE390_KEKL_TYPE_NONE 0
+#define TAPE390_KEKL_TYPE_LABEL 1
+#define TAPE390_KEKL_TYPE_HASH 2
+
+struct tape390_kekl {
+	unsigned char type;
+	unsigned char type_on_tape;
+	char label[65];
+} __attribute__ ((packed));
+
+struct tape390_kekl_pair {
+	struct tape390_kekl kekl[2];
+} __attribute__ ((packed));
+
+/*
+ * The TAPE390_KEKL_SET ioctl is used to set Key Encrypting Key labels.
+ */
+#define TAPE390_KEKL_SET _IOW('d', 4, struct tape390_kekl_pair)
+
+/*
+ * The TAPE390_KEKL_QUERY ioctl is used to query Key Encrypting Key labels.
+ */
+#define TAPE390_KEKL_QUERY _IOR('d', 5, struct tape390_kekl_pair)
+
+#endif 
diff --git a/arch/s390/include/uapi/asm/termbits.h b/arch/s390/include/uapi/asm/termbits.h
new file mode 100644
index 000000000..71bf6ac6a
--- /dev/null
+++ b/arch/s390/include/uapi/asm/termbits.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_S390_TERMBITS_H
+#define _ASM_S390_TERMBITS_H
+
+#include <asm-generic/termbits.h>
+
+#endif
diff --git a/arch/s390/include/uapi/asm/termios.h b/arch/s390/include/uapi/asm/termios.h
new file mode 100644
index 000000000..554f973db
--- /dev/null
+++ b/arch/s390/include/uapi/asm/termios.h
@@ -0,0 +1,49 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/termios.h"
+ */
+
+#ifndef _UAPI_S390_TERMIOS_H
+#define _UAPI_S390_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+
+#endif /* _UAPI_S390_TERMIOS_H */
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
new file mode 100644
index 000000000..3c3951e34
--- /dev/null
+++ b/arch/s390/include/uapi/asm/types.h
@@ -0,0 +1,26 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/types.h"
+ */
+
+#ifndef _UAPI_S390_TYPES_H
+#define _UAPI_S390_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+#ifndef __ASSEMBLY__
+
+/* A address type so that arithmetic can be done on it & it can be upgraded to
+   64 bit when necessary 
+*/
+typedef unsigned long addr_t; 
+typedef __signed__ long saddr_t;
+
+typedef struct {
+	__u32 u[4];
+} __vector128;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h
new file mode 100644
index 000000000..64a69aa5d
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ucontext.h
@@ -0,0 +1,40 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/ucontext.h"
+ */
+
+#ifndef _ASM_S390_UCONTEXT_H
+#define _ASM_S390_UCONTEXT_H
+
+#define UC_GPRS_HIGH	1	/* uc_mcontext_ext has valid high gprs */
+#define UC_VXRS		2	/* uc_mcontext_ext has valid vector regs */
+
+/*
+ * The struct ucontext_extended describes how the registers are stored
+ * on a rt signal frame. Please note that the structure is not fixed,
+ * if new CPU registers are added to the user state the size of the
+ * struct ucontext_extended will increase.
+ */
+struct ucontext_extended {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	_sigregs	  uc_mcontext;
+	sigset_t	  uc_sigmask;
+	/* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
+	unsigned char	  __unused[128 - sizeof(sigset_t)];
+	_sigregs_ext	  uc_mcontext_ext;
+};
+
+struct ucontext {
+	unsigned long	  uc_flags;
+	struct ucontext  *uc_link;
+	stack_t		  uc_stack;
+	_sigregs          uc_mcontext;
+	sigset_t	  uc_sigmask;
+	/* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
+	unsigned char	  __unused[128 - sizeof(sigset_t)];
+};
+
+#endif /* !_ASM_S390_UCONTEXT_H */
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
new file mode 100644
index 000000000..67878af25
--- /dev/null
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -0,0 +1,385 @@
+/*
+ *  S390 version
+ *
+ *  Derived from "include/asm-i386/unistd.h"
+ */
+
+#ifndef _UAPI_ASM_S390_UNISTD_H_
+#define _UAPI_ASM_S390_UNISTD_H_
+
+/*
+ * This file contains the system call numbers.
+ */
+
+#define __NR_exit                 1
+#define __NR_fork                 2
+#define __NR_read                 3
+#define __NR_write                4
+#define __NR_open                 5
+#define __NR_close                6
+#define __NR_restart_syscall	  7
+#define __NR_creat                8
+#define __NR_link                 9
+#define __NR_unlink              10
+#define __NR_execve              11
+#define __NR_chdir               12
+#define __NR_mknod               14
+#define __NR_chmod               15
+#define __NR_lseek               19
+#define __NR_getpid              20
+#define __NR_mount               21
+#define __NR_umount              22
+#define __NR_ptrace              26
+#define __NR_alarm               27
+#define __NR_pause               29
+#define __NR_utime               30
+#define __NR_access              33
+#define __NR_nice                34
+#define __NR_sync                36
+#define __NR_kill                37
+#define __NR_rename              38
+#define __NR_mkdir               39
+#define __NR_rmdir               40
+#define __NR_dup                 41
+#define __NR_pipe                42
+#define __NR_times               43
+#define __NR_brk                 45
+#define __NR_signal              48
+#define __NR_acct                51
+#define __NR_umount2             52
+#define __NR_ioctl               54
+#define __NR_fcntl               55
+#define __NR_setpgid             57
+#define __NR_umask               60
+#define __NR_chroot              61
+#define __NR_ustat               62
+#define __NR_dup2                63
+#define __NR_getppid             64
+#define __NR_getpgrp             65
+#define __NR_setsid              66
+#define __NR_sigaction           67
+#define __NR_sigsuspend          72
+#define __NR_sigpending          73
+#define __NR_sethostname         74
+#define __NR_setrlimit           75
+#define __NR_getrusage           77
+#define __NR_gettimeofday        78
+#define __NR_settimeofday        79
+#define __NR_symlink             83
+#define __NR_readlink            85
+#define __NR_uselib              86
+#define __NR_swapon              87
+#define __NR_reboot              88
+#define __NR_readdir             89
+#define __NR_mmap                90
+#define __NR_munmap              91
+#define __NR_truncate            92
+#define __NR_ftruncate           93
+#define __NR_fchmod              94
+#define __NR_getpriority         96
+#define __NR_setpriority         97
+#define __NR_statfs              99
+#define __NR_fstatfs            100
+#define __NR_socketcall         102
+#define __NR_syslog             103
+#define __NR_setitimer          104
+#define __NR_getitimer          105
+#define __NR_stat               106
+#define __NR_lstat              107
+#define __NR_fstat              108
+#define __NR_lookup_dcookie     110
+#define __NR_vhangup            111
+#define __NR_idle               112
+#define __NR_wait4              114
+#define __NR_swapoff            115
+#define __NR_sysinfo            116
+#define __NR_ipc                117
+#define __NR_fsync              118
+#define __NR_sigreturn          119
+#define __NR_clone              120
+#define __NR_setdomainname      121
+#define __NR_uname              122
+#define __NR_adjtimex           124
+#define __NR_mprotect           125
+#define __NR_sigprocmask        126
+#define __NR_create_module      127
+#define __NR_init_module        128
+#define __NR_delete_module      129
+#define __NR_get_kernel_syms    130
+#define __NR_quotactl           131
+#define __NR_getpgid            132
+#define __NR_fchdir             133
+#define __NR_bdflush            134
+#define __NR_sysfs              135
+#define __NR_personality        136
+#define __NR_afs_syscall        137 /* Syscall for Andrew File System */
+#define __NR_getdents           141
+#define __NR_flock              143
+#define __NR_msync              144
+#define __NR_readv              145
+#define __NR_writev             146
+#define __NR_getsid             147
+#define __NR_fdatasync          148
+#define __NR__sysctl            149
+#define __NR_mlock              150
+#define __NR_munlock            151
+#define __NR_mlockall           152
+#define __NR_munlockall         153
+#define __NR_sched_setparam             154
+#define __NR_sched_getparam             155
+#define __NR_sched_setscheduler         156
+#define __NR_sched_getscheduler         157
+#define __NR_sched_yield                158
+#define __NR_sched_get_priority_max     159
+#define __NR_sched_get_priority_min     160
+#define __NR_sched_rr_get_interval      161
+#define __NR_nanosleep          162
+#define __NR_mremap             163
+#define __NR_query_module       167
+#define __NR_poll               168
+#define __NR_nfsservctl         169
+#define __NR_prctl              172
+#define __NR_rt_sigreturn       173
+#define __NR_rt_sigaction       174
+#define __NR_rt_sigprocmask     175
+#define __NR_rt_sigpending      176
+#define __NR_rt_sigtimedwait    177
+#define __NR_rt_sigqueueinfo    178
+#define __NR_rt_sigsuspend      179
+#define __NR_pread64            180
+#define __NR_pwrite64           181
+#define __NR_getcwd             183
+#define __NR_capget             184
+#define __NR_capset             185
+#define __NR_sigaltstack        186
+#define __NR_sendfile           187
+#define __NR_getpmsg		188
+#define __NR_putpmsg		189
+#define __NR_vfork		190
+#define __NR_pivot_root         217
+#define __NR_mincore            218
+#define __NR_madvise            219
+#define __NR_getdents64		220
+#define __NR_readahead		222
+#define __NR_setxattr		224
+#define __NR_lsetxattr		225
+#define __NR_fsetxattr		226
+#define __NR_getxattr		227
+#define __NR_lgetxattr		228
+#define __NR_fgetxattr		229
+#define __NR_listxattr		230
+#define __NR_llistxattr		231
+#define __NR_flistxattr		232
+#define __NR_removexattr	233
+#define __NR_lremovexattr	234
+#define __NR_fremovexattr	235
+#define __NR_gettid		236
+#define __NR_tkill		237
+#define __NR_futex		238
+#define __NR_sched_setaffinity	239
+#define __NR_sched_getaffinity	240
+#define __NR_tgkill		241
+/* Number 242 is reserved for tux */
+#define __NR_io_setup		243
+#define __NR_io_destroy		244
+#define __NR_io_getevents	245
+#define __NR_io_submit		246
+#define __NR_io_cancel		247
+#define __NR_exit_group		248
+#define __NR_epoll_create	249
+#define __NR_epoll_ctl		250
+#define __NR_epoll_wait		251
+#define __NR_set_tid_address	252
+#define __NR_fadvise64		253
+#define __NR_timer_create	254
+#define __NR_timer_settime	(__NR_timer_create+1)
+#define __NR_timer_gettime	(__NR_timer_create+2)
+#define __NR_timer_getoverrun	(__NR_timer_create+3)
+#define __NR_timer_delete	(__NR_timer_create+4)
+#define __NR_clock_settime	(__NR_timer_create+5)
+#define __NR_clock_gettime	(__NR_timer_create+6)
+#define __NR_clock_getres	(__NR_timer_create+7)
+#define __NR_clock_nanosleep	(__NR_timer_create+8)
+/* Number 263 is reserved for vserver */
+#define __NR_statfs64		265
+#define __NR_fstatfs64		266
+#define __NR_remap_file_pages	267
+/* Number 268 is reserved for new sys_mbind */
+/* Number 269 is reserved for new sys_get_mempolicy */
+/* Number 270 is reserved for new sys_set_mempolicy */
+#define __NR_mq_open		271
+#define __NR_mq_unlink		272
+#define __NR_mq_timedsend	273
+#define __NR_mq_timedreceive	274
+#define __NR_mq_notify		275
+#define __NR_mq_getsetattr	276
+#define __NR_kexec_load		277
+#define __NR_add_key		278
+#define __NR_request_key	279
+#define __NR_keyctl		280
+#define __NR_waitid		281
+#define __NR_ioprio_set		282
+#define __NR_ioprio_get		283
+#define __NR_inotify_init	284
+#define __NR_inotify_add_watch	285
+#define __NR_inotify_rm_watch	286
+/* Number 287 is reserved for new sys_migrate_pages */
+#define __NR_openat		288
+#define __NR_mkdirat		289
+#define __NR_mknodat		290
+#define __NR_fchownat		291
+#define __NR_futimesat		292
+#define __NR_unlinkat		294
+#define __NR_renameat		295
+#define __NR_linkat		296
+#define __NR_symlinkat		297
+#define __NR_readlinkat		298
+#define __NR_fchmodat		299
+#define __NR_faccessat		300
+#define __NR_pselect6		301
+#define __NR_ppoll		302
+#define __NR_unshare		303
+#define __NR_set_robust_list	304
+#define __NR_get_robust_list	305
+#define __NR_splice		306
+#define __NR_sync_file_range	307
+#define __NR_tee		308
+#define __NR_vmsplice		309
+/* Number 310 is reserved for new sys_move_pages */
+#define __NR_getcpu		311
+#define __NR_epoll_pwait	312
+#define __NR_utimes		313
+#define __NR_fallocate		314
+#define __NR_utimensat		315
+#define __NR_signalfd		316
+#define __NR_timerfd		317
+#define __NR_eventfd		318
+#define __NR_timerfd_create	319
+#define __NR_timerfd_settime	320
+#define __NR_timerfd_gettime	321
+#define __NR_signalfd4		322
+#define __NR_eventfd2		323
+#define __NR_inotify_init1	324
+#define __NR_pipe2		325
+#define __NR_dup3		326
+#define __NR_epoll_create1	327
+#define	__NR_preadv		328
+#define	__NR_pwritev		329
+#define __NR_rt_tgsigqueueinfo	330
+#define __NR_perf_event_open	331
+#define __NR_fanotify_init	332
+#define __NR_fanotify_mark	333
+#define __NR_prlimit64		334
+#define __NR_name_to_handle_at	335
+#define __NR_open_by_handle_at	336
+#define __NR_clock_adjtime	337
+#define __NR_syncfs		338
+#define __NR_setns		339
+#define __NR_process_vm_readv	340
+#define __NR_process_vm_writev	341
+#define __NR_s390_runtime_instr 342
+#define __NR_kcmp		343
+#define __NR_finit_module	344
+#define __NR_sched_setattr	345
+#define __NR_sched_getattr	346
+#define __NR_renameat2		347
+#define __NR_seccomp		348
+#define __NR_getrandom		349
+#define __NR_memfd_create	350
+#define __NR_bpf		351
+#define __NR_s390_pci_mmio_write	352
+#define __NR_s390_pci_mmio_read		353
+#define __NR_execveat		354
+#define NR_syscalls 355
+
+/* 
+ * There are some system calls that are not present on 64 bit, some
+ * have a different name although they do the same (e.g. __NR_chown32
+ * is __NR_chown on 64 bit).
+ */
+#ifndef __s390x__
+
+#define __NR_time		 13
+#define __NR_lchown		 16
+#define __NR_setuid		 23
+#define __NR_getuid		 24
+#define __NR_stime		 25
+#define __NR_setgid		 46
+#define __NR_getgid		 47
+#define __NR_geteuid		 49
+#define __NR_getegid		 50
+#define __NR_setreuid		 70
+#define __NR_setregid		 71
+#define __NR_getrlimit		 76
+#define __NR_getgroups		 80
+#define __NR_setgroups		 81
+#define __NR_fchown		 95
+#define __NR_ioperm		101
+#define __NR_setfsuid		138
+#define __NR_setfsgid		139
+#define __NR__llseek		140
+#define __NR__newselect 	142
+#define __NR_setresuid		164
+#define __NR_getresuid		165
+#define __NR_setresgid		170
+#define __NR_getresgid		171
+#define __NR_chown		182
+#define __NR_ugetrlimit		191	/* SuS compliant getrlimit */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
+#define __NR_lchown32		198
+#define __NR_getuid32		199
+#define __NR_getgid32		200
+#define __NR_geteuid32		201
+#define __NR_getegid32		202
+#define __NR_setreuid32		203
+#define __NR_setregid32		204
+#define __NR_getgroups32	205
+#define __NR_setgroups32	206
+#define __NR_fchown32		207
+#define __NR_setresuid32	208
+#define __NR_getresuid32	209
+#define __NR_setresgid32	210
+#define __NR_getresgid32	211
+#define __NR_chown32		212
+#define __NR_setuid32		213
+#define __NR_setgid32		214
+#define __NR_setfsuid32		215
+#define __NR_setfsgid32		216
+#define __NR_fcntl64		221
+#define __NR_sendfile64		223
+#define __NR_fadvise64_64	264
+#define __NR_fstatat64		293
+
+#else
+
+#define __NR_select		142
+#define __NR_getrlimit		191	/* SuS compliant getrlimit */
+#define __NR_lchown  		198
+#define __NR_getuid  		199
+#define __NR_getgid  		200
+#define __NR_geteuid  		201
+#define __NR_getegid  		202
+#define __NR_setreuid  		203
+#define __NR_setregid  		204
+#define __NR_getgroups  	205
+#define __NR_setgroups  	206
+#define __NR_fchown  		207
+#define __NR_setresuid  	208
+#define __NR_getresuid  	209
+#define __NR_setresgid  	210
+#define __NR_getresgid  	211
+#define __NR_chown  		212
+#define __NR_setuid  		213
+#define __NR_setgid  		214
+#define __NR_setfsuid  		215
+#define __NR_setfsgid  		216
+#define __NR_newfstatat		293
+
+#endif
+
+#endif /* _UAPI_ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h
new file mode 100644
index 000000000..a9a4ebf79
--- /dev/null
+++ b/arch/s390/include/uapi/asm/virtio-ccw.h
@@ -0,0 +1,21 @@
+/*
+ * Definitions for virtio-ccw devices.
+ *
+ * Copyright IBM Corp. 2013
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *  Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_VIRTIO_CCW_H
+#define __KVM_VIRTIO_CCW_H
+
+/* Alignment of vring buffers. */
+#define KVM_VIRTIO_CCW_RING_ALIGN 4096
+
+/* Subcode for diagnose 500 (virtio hypercall). */
+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
+
+#endif
diff --git a/arch/s390/include/uapi/asm/vtoc.h b/arch/s390/include/uapi/asm/vtoc.h
new file mode 100644
index 000000000..221419de2
--- /dev/null
+++ b/arch/s390/include/uapi/asm/vtoc.h
@@ -0,0 +1,213 @@
+/*
+ * This file contains volume label definitions for DASD devices.
+ *
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Volker Sameske <sameske@de.ibm.com>
+ *
+ */
+
+#ifndef _ASM_S390_VTOC_H
+#define _ASM_S390_VTOC_H
+
+#include <linux/types.h>
+
+struct vtoc_ttr
+{
+	__u16 tt;
+	__u8 r;
+} __attribute__ ((packed));
+
+struct vtoc_cchhb
+{
+	__u16 cc;
+	__u16 hh;
+	__u8 b;
+} __attribute__ ((packed));
+
+struct vtoc_cchh
+{
+	__u16 cc;
+	__u16 hh;
+} __attribute__ ((packed));
+
+struct vtoc_labeldate
+{
+	__u8 year;
+	__u16 day;
+} __attribute__ ((packed));
+
+struct vtoc_volume_label_cdl
+{
+	char volkey[4];		/* volume key = volume label */
+	char vollbl[4];		/* volume label */
+	char volid[6];		/* volume identifier */
+	__u8 security;		/* security byte */
+	struct vtoc_cchhb vtoc;	/* VTOC address */
+	char res1[5];		/* reserved */
+	char cisize[4];		/* CI-size for FBA,... */
+				/* ...blanks for CKD */
+	char blkperci[4];	/* no of blocks per CI (FBA), blanks for CKD */
+	char labperci[4];	/* no of labels per CI (FBA), blanks for CKD */
+	char res2[4];		/* reserved */
+	char lvtoc[14];		/* owner code for LVTOC */
+	char res3[29];		/* reserved */
+} __attribute__ ((packed));
+
+struct vtoc_volume_label_ldl {
+	char vollbl[4];		/* volume label */
+	char volid[6];		/* volume identifier */
+	char res3[69];		/* reserved */
+	char ldl_version;	/* version number, valid for ldl format */
+	__u64 formatted_blocks; /* valid when ldl_version >= f2  */
+} __attribute__ ((packed));
+
+struct vtoc_extent
+{
+	__u8 typeind;			/* extent type indicator */
+	__u8 seqno;			/* extent sequence number */
+	struct vtoc_cchh llimit;	/* starting point of this extent */
+	struct vtoc_cchh ulimit;	/* ending point of this extent */
+} __attribute__ ((packed));
+
+struct vtoc_dev_const
+{
+	__u16 DS4DSCYL;	/* number of logical cyls */
+	__u16 DS4DSTRK;	/* number of tracks in a logical cylinder */
+	__u16 DS4DEVTK;	/* device track length */
+	__u8 DS4DEVI;	/* non-last keyed record overhead */
+	__u8 DS4DEVL;	/* last keyed record overhead */
+	__u8 DS4DEVK;	/* non-keyed record overhead differential */
+	__u8 DS4DEVFG;	/* flag byte */
+	__u16 DS4DEVTL;	/* device tolerance */
+	__u8 DS4DEVDT;	/* number of DSCB's per track */
+	__u8 DS4DEVDB;	/* number of directory blocks per track */
+} __attribute__ ((packed));
+
+struct vtoc_format1_label
+{
+	char DS1DSNAM[44];	/* data set name */
+	__u8 DS1FMTID;		/* format identifier */
+	char DS1DSSN[6];	/* data set serial number */
+	__u16 DS1VOLSQ;		/* volume sequence number */
+	struct vtoc_labeldate DS1CREDT; /* creation date: ydd */
+	struct vtoc_labeldate DS1EXPDT; /* expiration date */
+	__u8 DS1NOEPV;		/* number of extents on volume */
+	__u8 DS1NOBDB;		/* no. of bytes used in last direction blk */
+	__u8 DS1FLAG1;		/* flag 1 */
+	char DS1SYSCD[13];	/* system code */
+	struct vtoc_labeldate DS1REFD; /* date last referenced	*/
+	__u8 DS1SMSFG;		/* system managed storage indicators */
+	__u8 DS1SCXTF;		/* sec. space extension flag byte */
+	__u16 DS1SCXTV;		/* secondary space extension value */
+	__u8 DS1DSRG1;		/* data set organisation byte 1 */
+	__u8 DS1DSRG2;		/* data set organisation byte 2 */
+	__u8 DS1RECFM;		/* record format */
+	__u8 DS1OPTCD;		/* option code */
+	__u16 DS1BLKL;		/* block length */
+	__u16 DS1LRECL;		/* record length */
+	__u8 DS1KEYL;		/* key length */
+	__u16 DS1RKP;		/* relative key position */
+	__u8 DS1DSIND;		/* data set indicators */
+	__u8 DS1SCAL1;		/* secondary allocation flag byte */
+	char DS1SCAL3[3];	/* secondary allocation quantity */
+	struct vtoc_ttr DS1LSTAR; /* last used track and block on track */
+	__u16 DS1TRBAL;		/* space remaining on last used track */
+	__u16 res1;		/* reserved */
+	struct vtoc_extent DS1EXT1; /* first extent description */
+	struct vtoc_extent DS1EXT2; /* second extent description */
+	struct vtoc_extent DS1EXT3; /* third extent description */
+	struct vtoc_cchhb DS1PTRDS; /* possible pointer to f2 or f3 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_format4_label
+{
+	char DS4KEYCD[44];	/* key code for VTOC labels: 44 times 0x04 */
+	__u8 DS4IDFMT;		/* format identifier */
+	struct vtoc_cchhb DS4HPCHR; /* highest address of a format 1 DSCB */
+	__u16 DS4DSREC;		/* number of available DSCB's */
+	struct vtoc_cchh DS4HCCHH; /* CCHH of next available alternate track */
+	__u16 DS4NOATK;		/* number of remaining alternate tracks */
+	__u8 DS4VTOCI;		/* VTOC indicators */
+	__u8 DS4NOEXT;		/* number of extents in VTOC */
+	__u8 DS4SMSFG;		/* system managed storage indicators */
+	__u8 DS4DEVAC;		/* number of alternate cylinders.
+				 * Subtract from first two bytes of
+				 * DS4DEVSZ to get number of usable
+				 * cylinders. can be zero. valid
+				 * only if DS4DEVAV on. */
+	struct vtoc_dev_const DS4DEVCT;	/* device constants */
+	char DS4AMTIM[8];	/* VSAM time stamp */
+	char DS4AMCAT[3];	/* VSAM catalog indicator */
+	char DS4R2TIM[8];	/* VSAM volume/catalog match time stamp */
+	char res1[5];		/* reserved */
+	char DS4F6PTR[5];	/* pointer to first format 6 DSCB */
+	struct vtoc_extent DS4VTOCE; /* VTOC extent description */
+	char res2[10];		/* reserved */
+	__u8 DS4EFLVL;		/* extended free-space management level */
+	struct vtoc_cchhb DS4EFPTR; /* pointer to extended free-space info */
+	char res3;		/* reserved */
+	__u32 DS4DCYL;		/* number of logical cyls */
+	char res4[2];		/* reserved */
+	__u8 DS4DEVF2;		/* device flags */
+	char res5;		/* reserved */
+} __attribute__ ((packed));
+
+struct vtoc_ds5ext
+{
+	__u16 t;	/* RTA of the first track of free extent */
+	__u16 fc;	/* number of whole cylinders in free ext. */
+	__u8 ft;	/* number of remaining free tracks */
+} __attribute__ ((packed));
+
+struct vtoc_format5_label
+{
+	char DS5KEYID[4];	/* key identifier */
+	struct vtoc_ds5ext DS5AVEXT; /* first available (free-space) extent. */
+	struct vtoc_ds5ext DS5EXTAV[7]; /* seven available extents */
+	__u8 DS5FMTID;		/* format identifier */
+	struct vtoc_ds5ext DS5MAVET[18]; /* eighteen available extents */
+	struct vtoc_cchhb DS5PTRDS; /* pointer to next format5 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_ds7ext
+{
+	__u32 a; /* starting RTA value */
+	__u32 b; /* ending RTA value + 1 */
+} __attribute__ ((packed));
+
+struct vtoc_format7_label
+{
+	char DS7KEYID[4];	/* key identifier */
+	struct vtoc_ds7ext DS7EXTNT[5]; /* space for 5 extent descriptions */
+	__u8 DS7FMTID;		/* format identifier */
+	struct vtoc_ds7ext DS7ADEXT[11]; /* space for 11 extent descriptions */
+	char res1[2];		/* reserved */
+	struct vtoc_cchhb DS7PTRDS; /* pointer to next FMT7 DSCB */
+} __attribute__ ((packed));
+
+struct vtoc_cms_label {
+	__u8 label_id[4];		/* Label identifier */
+	__u8 vol_id[6];		/* Volid */
+	__u16 version_id;		/* Version identifier */
+	__u32 block_size;		/* Disk block size */
+	__u32 origin_ptr;		/* Disk origin pointer */
+	__u32 usable_count;	/* Number of usable cylinders/blocks */
+	__u32 formatted_count;	/* Maximum number of formatted cylinders/
+				 * blocks */
+	__u32 block_count;	/* Disk size in CMS blocks */
+	__u32 used_count;		/* Number of CMS blocks in use */
+	__u32 fst_size;		/* File Status Table (FST) size */
+	__u32 fst_count;		/* Number of FSTs per CMS block */
+	__u8 format_date[6];	/* Disk FORMAT date */
+	__u8 reserved1[2];
+	__u32 disk_offset;	/* Disk offset when reserved*/
+	__u32 map_block;		/* Allocation Map Block with next hole */
+	__u32 hblk_disp;		/* Displacement into HBLK data of next hole */
+	__u32 user_disp;		/* Displacement into user part of Allocation
+				 * map */
+	__u8 reserved2[4];
+	__u8 segment_name[8];	/* Name of shared segment */
+} __attribute__ ((packed));
+
+#endif /* _ASM_S390_VTOC_H */
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
new file mode 100644
index 000000000..f2b18eaca
--- /dev/null
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -0,0 +1,341 @@
+/*
+ *  include/asm-s390/zcrypt.h
+ *
+ *  zcrypt 2.1.0 (user-visible header)
+ *
+ *  Copyright IBM Corp. 2001, 2006
+ *  Author(s): Robert Burroughs
+ *	       Eric Rossman (edrossma@us.ibm.com)
+ *
+ *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ASM_S390_ZCRYPT_H
+#define __ASM_S390_ZCRYPT_H
+
+#define ZCRYPT_VERSION 2
+#define ZCRYPT_RELEASE 1
+#define ZCRYPT_VARIANT 1
+
+#include <linux/ioctl.h>
+#include <linux/compiler.h>
+
+/**
+ * struct ica_rsa_modexpo
+ *
+ * Requirements:
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ *   the left with zeroes.
+ * - length(b_key) = inputdatalength
+ * - length(n_modulus) = inputdatalength
+ */
+struct ica_rsa_modexpo {
+	char __user *	inputdata;
+	unsigned int	inputdatalength;
+	char __user *	outputdata;
+	unsigned int	outputdatalength;
+	char __user *	b_key;
+	char __user *	n_modulus;
+};
+
+/**
+ * struct ica_rsa_modexpo_crt
+ *
+ * Requirements:
+ * - inputdatalength is even.
+ * - outputdatalength is at least as large as inputdatalength.
+ * - All key parts are right justified in their fields, padded on
+ *   the left with zeroes.
+ * - length(bp_key)	= inputdatalength/2 + 8
+ * - length(bq_key)	= inputdatalength/2
+ * - length(np_key)	= inputdatalength/2 + 8
+ * - length(nq_key)	= inputdatalength/2
+ * - length(u_mult_inv) = inputdatalength/2 + 8
+ */
+struct ica_rsa_modexpo_crt {
+	char __user *	inputdata;
+	unsigned int	inputdatalength;
+	char __user *	outputdata;
+	unsigned int	outputdatalength;
+	char __user *	bp_key;
+	char __user *	bq_key;
+	char __user *	np_prime;
+	char __user *	nq_prime;
+	char __user *	u_mult_inv;
+};
+
+/**
+ * CPRBX
+ *	  Note that all shorts and ints are big-endian.
+ *	  All pointer fields are 16 bytes long, and mean nothing.
+ *
+ *	  A request CPRB is followed by a request_parameter_block.
+ *
+ *	  The request (or reply) parameter block is organized thus:
+ *	    function code
+ *	    VUD block
+ *	    key block
+ */
+struct CPRBX {
+	unsigned short	cprb_len;	/* CPRB length	      220	 */
+	unsigned char	cprb_ver_id;	/* CPRB version id.   0x02	 */
+	unsigned char	pad_000[3];	/* Alignment pad bytes		 */
+	unsigned char	func_id[2];	/* function id	      0x5432	 */
+	unsigned char	cprb_flags[4];	/* Flags			 */
+	unsigned int	req_parml;	/* request parameter buffer len	 */
+	unsigned int	req_datal;	/* request data buffer		 */
+	unsigned int	rpl_msgbl;	/* reply  message block length	 */
+	unsigned int	rpld_parml;	/* replied parameter block len	 */
+	unsigned int	rpl_datal;	/* reply data block len		 */
+	unsigned int	rpld_datal;	/* replied data block len	 */
+	unsigned int	req_extbl;	/* request extension block len	 */
+	unsigned char	pad_001[4];	/* reserved			 */
+	unsigned int	rpld_extbl;	/* replied extension block len	 */
+	unsigned char	padx000[16 - sizeof (char *)];
+	unsigned char *	req_parmb;	/* request parm block 'address'	 */
+	unsigned char	padx001[16 - sizeof (char *)];
+	unsigned char *	req_datab;	/* request data block 'address'	 */
+	unsigned char	padx002[16 - sizeof (char *)];
+	unsigned char *	rpl_parmb;	/* reply parm block 'address'	 */
+	unsigned char	padx003[16 - sizeof (char *)];
+	unsigned char *	rpl_datab;	/* reply data block 'address'	 */
+	unsigned char	padx004[16 - sizeof (char *)];
+	unsigned char *	req_extb;	/* request extension block 'addr'*/
+	unsigned char	padx005[16 - sizeof (char *)];
+	unsigned char *	rpl_extb;	/* reply extension block 'address'*/
+	unsigned short	ccp_rtcode;	/* server return code		 */
+	unsigned short	ccp_rscode;	/* server reason code		 */
+	unsigned int	mac_data_len;	/* Mac Data Length		 */
+	unsigned char	logon_id[8];	/* Logon Identifier		 */
+	unsigned char	mac_value[8];	/* Mac Value			 */
+	unsigned char	mac_content_flgs;/* Mac content flag byte	 */
+	unsigned char	pad_002;	/* Alignment			 */
+	unsigned short	domain;		/* Domain			 */
+	unsigned char	usage_domain[4];/* Usage domain			 */
+	unsigned char	cntrl_domain[4];/* Control domain		 */
+	unsigned char	S390enf_mask[4];/* S/390 enforcement mask	 */
+	unsigned char	pad_004[36];	/* reserved			 */
+} __attribute__((packed));
+
+/**
+ * xcRB
+ */
+struct ica_xcRB {
+	unsigned short	agent_ID;
+	unsigned int	user_defined;
+	unsigned short	request_ID;
+	unsigned int	request_control_blk_length;
+	unsigned char	padding1[16 - sizeof (char *)];
+	char __user *	request_control_blk_addr;
+	unsigned int	request_data_length;
+	char		padding2[16 - sizeof (char *)];
+	char __user *	request_data_address;
+	unsigned int	reply_control_blk_length;
+	char		padding3[16 - sizeof (char *)];
+	char __user *	reply_control_blk_addr;
+	unsigned int	reply_data_length;
+	char		padding4[16 - sizeof (char *)];
+	char __user *	reply_data_addr;
+	unsigned short	priority_window;
+	unsigned int	status;
+} __attribute__((packed));
+
+/**
+ * struct ep11_cprb - EP11 connectivity programming request block
+ * @cprb_len:		CPRB header length [0x0020]
+ * @cprb_ver_id:	CPRB version id.   [0x04]
+ * @pad_000:		Alignment pad bytes
+ * @flags:		Admin cmd [0x80] or functional cmd [0x00]
+ * @func_id:		Function id / subtype [0x5434]
+ * @source_id:		Source id [originator id]
+ * @target_id:		Target id [usage/ctrl domain id]
+ * @ret_code:		Return code
+ * @reserved1:		Reserved
+ * @reserved2:		Reserved
+ * @payload_len:	Payload length
+ */
+struct ep11_cprb {
+	uint16_t	cprb_len;
+	unsigned char	cprb_ver_id;
+	unsigned char	pad_000[2];
+	unsigned char	flags;
+	unsigned char	func_id[2];
+	uint32_t	source_id;
+	uint32_t	target_id;
+	uint32_t	ret_code;
+	uint32_t	reserved1;
+	uint32_t	reserved2;
+	uint32_t	payload_len;
+} __attribute__((packed));
+
+/**
+ * struct ep11_target_dev - EP11 target device list
+ * @ap_id:	AP device id
+ * @dom_id:	Usage domain id
+ */
+struct ep11_target_dev {
+	uint16_t ap_id;
+	uint16_t dom_id;
+};
+
+/**
+ * struct ep11_urb - EP11 user request block
+ * @targets_num:	Number of target adapters
+ * @targets:		Addr to target adapter list
+ * @weight:		Level of request priority
+ * @req_no:		Request id/number
+ * @req_len:		Request length
+ * @req:		Addr to request block
+ * @resp_len:		Response length
+ * @resp:		Addr to response block
+ */
+struct ep11_urb {
+	uint16_t		targets_num;
+	uint64_t		targets;
+	uint64_t		weight;
+	uint64_t		req_no;
+	uint64_t		req_len;
+	uint64_t		req;
+	uint64_t		resp_len;
+	uint64_t		resp;
+} __attribute__((packed));
+
+#define AUTOSELECT ((unsigned int)0xFFFFFFFF)
+
+#define ZCRYPT_IOCTL_MAGIC 'z'
+
+/**
+ * Interface notes:
+ *
+ * The ioctl()s which are implemented (along with relevant details)
+ * are:
+ *
+ *   ICARSAMODEXPO
+ *     Perform an RSA operation using a Modulus-Exponent pair
+ *     This takes an ica_rsa_modexpo struct as its arg.
+ *
+ *     NOTE: please refer to the comments preceding this structure
+ *	     for the implementation details for the contents of the
+ *	     block
+ *
+ *   ICARSACRT
+ *     Perform an RSA operation using a Chinese-Remainder Theorem key
+ *     This takes an ica_rsa_modexpo_crt struct as its arg.
+ *
+ *     NOTE: please refer to the comments preceding this structure
+ *	     for the implementation details for the contents of the
+ *	     block
+ *
+ *   ZSECSENDCPRB
+ *     Send an arbitrary CPRB to a crypto card.
+ *
+ *   ZSENDEP11CPRB
+ *     Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
+ *
+ *   Z90STAT_STATUS_MASK
+ *     Return an 64 element array of unsigned chars for the status of
+ *     all devices.
+ *	 0x01: PCICA
+ *	 0x02: PCICC
+ *	 0x03: PCIXCC_MCL2
+ *	 0x04: PCIXCC_MCL3
+ *	 0x05: CEX2C
+ *	 0x06: CEX2A
+ *	 0x0d: device is disabled via the proc filesystem
+ *
+ *   Z90STAT_QDEPTH_MASK
+ *     Return an 64 element array of unsigned chars for the queue
+ *     depth of all devices.
+ *
+ *   Z90STAT_PERDEV_REQCNT
+ *     Return an 64 element array of unsigned integers for the number
+ *     of successfully completed requests per device since the device
+ *     was detected and made available.
+ *
+ *   Z90STAT_REQUESTQ_COUNT
+ *     Return an integer count of the number of entries waiting to be
+ *     sent to a device.
+ *
+ *   Z90STAT_PENDINGQ_COUNT
+ *     Return an integer count of the number of entries sent to all
+ *     devices awaiting the reply.
+ *
+ *   Z90STAT_TOTALOPEN_COUNT
+ *     Return an integer count of the number of open file handles.
+ *
+ *   Z90STAT_DOMAIN_INDEX
+ *     Return the integer value of the Cryptographic Domain.
+ *
+ *   The following ioctls are deprecated and should be no longer used:
+ *
+ *   Z90STAT_TOTALCOUNT
+ *     Return an integer count of all device types together.
+ *
+ *   Z90STAT_PCICACOUNT
+ *     Return an integer count of all PCICAs.
+ *
+ *   Z90STAT_PCICCCOUNT
+ *     Return an integer count of all PCICCs.
+ *
+ *   Z90STAT_PCIXCCMCL2COUNT
+ *     Return an integer count of all MCL2 PCIXCCs.
+ *
+ *   Z90STAT_PCIXCCMCL3COUNT
+ *     Return an integer count of all MCL3 PCIXCCs.
+ *
+ *   Z90STAT_CEX2CCOUNT
+ *     Return an integer count of all CEX2Cs.
+ *
+ *   Z90STAT_CEX2ACOUNT
+ *     Return an integer count of all CEX2As.
+ *
+ *   ICAZ90STATUS
+ *     Return some device driver status in a ica_z90_status struct
+ *     This takes an ica_z90_status struct as its arg.
+ *
+ *   Z90STAT_PCIXCCCOUNT
+ *     Return an integer count of all PCIXCCs (MCL2 + MCL3).
+ *     This is DEPRECATED now that MCL3 PCIXCCs are treated differently from
+ *     MCL2 PCIXCCs.
+ */
+
+/**
+ * Supported ioctl calls
+ */
+#define ICARSAMODEXPO	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0)
+#define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
+#define ZSECSENDCPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
+#define ZSENDEP11CPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
+
+/* New status calls */
+#define Z90STAT_TOTALCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
+#define Z90STAT_PCICACOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int)
+#define Z90STAT_PCICCCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int)
+#define Z90STAT_PCIXCCMCL2COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int)
+#define Z90STAT_PCIXCCMCL3COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int)
+#define Z90STAT_CEX2CCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int)
+#define Z90STAT_CEX2ACOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int)
+#define Z90STAT_REQUESTQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int)
+#define Z90STAT_PENDINGQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int)
+#define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int)
+#define Z90STAT_DOMAIN_INDEX	_IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int)
+#define Z90STAT_STATUS_MASK	_IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
+#define Z90STAT_QDEPTH_MASK	_IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
+#define Z90STAT_PERDEV_REQCNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+#endif /* __ASM_S390_ZCRYPT_H */
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
new file mode 100644
index 000000000..c5f676c3c
--- /dev/null
+++ b/arch/s390/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
new file mode 100644
index 000000000..ffb87617a
--- /dev/null
+++ b/arch/s390/kernel/Makefile
@@ -0,0 +1,60 @@
+#
+# Makefile for the linux kernel.
+#
+
+ifdef CONFIG_FUNCTION_TRACER
+# Don't trace early setup code and tracing code
+CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+endif
+
+#
+# Passing null pointers is ok for smp code, since we access the lowcore here.
+#
+CFLAGS_smp.o	:= -Wno-nonnull
+
+#
+# Disable tailcall optimizations for stack / callchain walking functions
+# since this might generate broken code when accessing register 15 and
+# passing its content to other functions.
+#
+CFLAGS_stacktrace.o	+= -fno-optimize-sibling-calls
+CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
+
+#
+# Pass UTS_MACHINE for user_regset definition
+#
+CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+CFLAGS_sysinfo.o += -w
+
+obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
+obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
+obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
+obj-y	+= runtime_instr.o cache.o dumpstack.o
+obj-y	+= entry.o reipl.o relocate_kernel.o
+
+extra-y				+= head.o head64.o vmlinux.lds
+
+obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o
+obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_SCHED_BOOK)	+= topology.o
+obj-$(CONFIG_HIBERNATION)	+= suspend.o swsusp.o
+obj-$(CONFIG_AUDIT)		+= audit.o
+compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
+obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o
+obj-$(CONFIG_COMPAT)		+= compat_wrapper.o $(compat-obj-y)
+
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_UPROBES)		+= uprobes.o
+
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf.o perf_cpum_sf.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o
+
+# vdso
+obj-y				+= vdso64/
+obj-$(CONFIG_COMPAT)		+= vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
new file mode 100644
index 000000000..c7d1b9d09
--- /dev/null
+++ b/arch/s390/kernel/asm-offsets.c
@@ -0,0 +1,184 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#define ASM_OFFSETS_C
+
+#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
+#include <linux/sched.h>
+#include <asm/idle.h>
+#include <asm/vdso.h>
+#include <asm/pgtable.h>
+
+/*
+ * Make sure that the compiler is new enough. We want a compiler that
+ * is known to work with the "Q" assembler constraint.
+ */
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)
+#error Your compiler is too old; please use version 4.3 or newer
+#endif
+
+int main(void)
+{
+	DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
+	DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
+	DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment));
+	BLANK();
+	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
+	BLANK();
+	DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
+	DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
+	DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
+	BLANK();
+	DEFINE(__TI_task, offsetof(struct thread_info, task));
+	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
+	DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
+	DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
+	DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
+	DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
+	DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
+	DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
+	BLANK();
+	DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
+	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
+	DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
+	DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
+	DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
+	DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
+	DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
+	DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags));
+	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
+	BLANK();
+	DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
+	DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
+	DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
+	BLANK();
+	/* timeval/timezone offsets for use by vdso */
+	DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count));
+	DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp));
+	DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec));
+	DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
+	DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
+	DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
+	DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
+	DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+	DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec));
+	DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec));
+	DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
+	DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
+	DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult));
+	DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift));
+	DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base));
+	DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time));
+	/* constants used by the vdso */
+	DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
+	DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+	DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+	DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
+	DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
+	DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+	DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
+	BLANK();
+	/* idle data offsets */
+	DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter));
+	DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit));
+	DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter));
+	DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit));
+	/* lowcore offsets */
+	DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
+	DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
+	DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));
+	DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));
+	DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code));
+	DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
+	DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
+	DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
+	DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
+	DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
+	DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
+	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
+	DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
+	DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
+	DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
+	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
+	DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
+	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
+	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
+	DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
+	DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
+	DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
+	DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
+	DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
+	DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
+	DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
+	DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
+	DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));
+	DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));
+	DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw));
+	DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
+	DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));
+	DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));
+	DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));
+	DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));
+	DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw));
+	BLANK();
+	DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
+	DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
+	DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
+	DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags));
+	DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
+	DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
+	DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
+	DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
+	DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
+	DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
+	DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
+	DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
+	DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer));
+	DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer));
+	DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock));
+	DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task));
+	DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid));
+	DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info));
+	DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
+	DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
+	DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
+	DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack));
+	DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
+	DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
+	DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
+	DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
+	DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
+	DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
+	DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
+	DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
+	DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
+	BLANK();
+	DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
+	DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
+	DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area));
+	DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area));
+	DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area));
+	DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
+	DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
+	DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
+	DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
+	DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
+	DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
+	DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
+	DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
+	DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
+	DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area));
+	DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
+	DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
+	DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
+	DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
+	DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
+	DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
+	DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
+	DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
+	return 0;
+}
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
new file mode 100644
index 000000000..f4932c22e
--- /dev/null
+++ b/arch/s390/kernel/audit.c
@@ -0,0 +1,78 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+#include "audit.h"
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_COMPAT
+	if (arch == AUDIT_ARCH_S390)
+		return 1;
+#endif
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_COMPAT
+	if (abi == AUDIT_ARCH_S390)
+		return s390_classify_syscall(syscall);
+#endif
+	switch(syscall) {
+	case __NR_open:
+		return 2;
+	case __NR_openat:
+		return 3;
+	case __NR_socketcall:
+		return 4;
+	case __NR_execve:
+		return 5;
+	default:
+		return 0;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_COMPAT
+	audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class);
+	audit_register_class(AUDIT_CLASS_READ_32, s390_read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class);
+#endif
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h
new file mode 100644
index 000000000..12b56f4b5
--- /dev/null
+++ b/arch/s390/kernel/audit.h
@@ -0,0 +1,15 @@
+#ifndef __ARCH_S390_KERNEL_AUDIT_H
+#define __ARCH_S390_KERNEL_AUDIT_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_COMPAT
+extern int s390_classify_syscall(unsigned);
+extern __u32 s390_dir_class[];
+extern __u32 s390_write_class[];
+extern __u32 s390_read_class[];
+extern __u32 s390_chattr_class[];
+extern __u32 s390_signal_class[];
+#endif /* CONFIG_COMPAT */
+
+#endif /* __ARCH_S390_KERNEL_AUDIT_H */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
new file mode 100644
index 000000000..daed3fde4
--- /dev/null
+++ b/arch/s390/kernel/base.S
@@ -0,0 +1,131 @@
+/*
+ *  arch/s390/kernel/base.S
+ *
+ *    Copyright IBM Corp. 2006, 2007
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ *		 Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/sigp.h>
+
+ENTRY(s390_base_mcck_handler)
+	basr	%r13,0
+0:	lg	%r15,__LC_PANIC_STACK	# load panic stack
+	aghi	%r15,-STACK_FRAME_OVERHEAD
+	larl	%r1,s390_base_mcck_handler_fn
+	lg	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	1f
+	basr	%r14,%r1
+1:	la	%r1,4095
+	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
+	lpswe	__LC_MCK_OLD_PSW
+
+	.section .bss
+	.align 8
+	.globl	s390_base_mcck_handler_fn
+s390_base_mcck_handler_fn:
+	.quad	0
+	.previous
+
+ENTRY(s390_base_ext_handler)
+	stmg	%r0,%r15,__LC_SAVE_AREA_ASYNC
+	basr	%r13,0
+0:	aghi	%r15,-STACK_FRAME_OVERHEAD
+	larl	%r1,s390_base_ext_handler_fn
+	lg	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	1f
+	basr	%r14,%r1
+1:	lmg	%r0,%r15,__LC_SAVE_AREA_ASYNC
+	ni	__LC_EXT_OLD_PSW+1,0xfd	# clear wait state bit
+	lpswe	__LC_EXT_OLD_PSW
+
+	.section .bss
+	.align 8
+	.globl s390_base_ext_handler_fn
+s390_base_ext_handler_fn:
+	.quad	0
+	.previous
+
+ENTRY(s390_base_pgm_handler)
+	stmg	%r0,%r15,__LC_SAVE_AREA_SYNC
+	basr	%r13,0
+0:	aghi	%r15,-STACK_FRAME_OVERHEAD
+	larl	%r1,s390_base_pgm_handler_fn
+	lg	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	1f
+	basr	%r14,%r1
+	lmg	%r0,%r15,__LC_SAVE_AREA_SYNC
+	lpswe	__LC_PGM_OLD_PSW
+1:	lpswe	disabled_wait_psw-0b(%r13)
+
+	.align	8
+disabled_wait_psw:
+	.quad	0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler
+
+	.section .bss
+	.align 8
+	.globl s390_base_pgm_handler_fn
+s390_base_pgm_handler_fn:
+	.quad	0
+	.previous
+
+#
+# Calls diag 308 subcode 1 and continues execution
+#
+# The following conditions must be ensured before calling this function:
+# * Prefix register = 0
+# * Lowcore protection is disabled
+#
+ENTRY(diag308_reset)
+	larl	%r4,.Lctlregs		# Save control registers
+	stctg	%c0,%c15,0(%r4)
+	larl	%r4,.Lfpctl		# Floating point control register
+	stfpc	0(%r4)
+	larl	%r4,.Lcontinue_psw	# Save PSW flags
+	epsw	%r2,%r3
+	stm	%r2,%r3,0(%r4)
+	larl	%r4,.Lrestart_psw	# Setup restart PSW at absolute 0
+	lghi	%r3,0
+	lg	%r4,0(%r4)		# Save PSW
+	sturg	%r4,%r3			# Use sturg, because of large pages
+	lghi	%r1,1
+	lghi	%r0,0
+	diag	%r0,%r1,0x308
+.Lrestart_part2:
+	lhi	%r0,0			# Load r0 with zero
+	lhi	%r1,2			# Use mode 2 = ESAME (dump)
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to ESAME mode
+	sam64				# Switch to 64 bit addressing mode
+	larl	%r4,.Lctlregs		# Restore control registers
+	lctlg	%c0,%c15,0(%r4)
+	larl	%r4,.Lfpctl		# Restore floating point ctl register
+	lfpc	0(%r4)
+	larl	%r4,.Lcontinue_psw	# Restore PSW flags
+	lpswe	0(%r4)
+.Lcontinue:
+	br	%r14
+.align 16
+.Lrestart_psw:
+	.long	0x00080000,0x80000000 + .Lrestart_part2
+
+	.section .data..nosave,"aw",@progbits
+.align 8
+.Lcontinue_psw:
+	.quad	0,.Lcontinue
+	.previous
+
+	.section .bss
+.align 8
+.Lctlregs:
+	.rept	16
+	.quad	0
+	.endr
+.Lfpctl:
+	.long	0
+	.previous
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
new file mode 100644
index 000000000..bff5e3b6d
--- /dev/null
+++ b/arch/s390/kernel/cache.c
@@ -0,0 +1,181 @@
+/*
+ * Extract CPU cache information and expose them via sysfs.
+ *
+ *    Copyright IBM Corp. 2012
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/seq_file.h>
+#include <linux/cpu.h>
+#include <linux/cacheinfo.h>
+#include <asm/facility.h>
+
+enum {
+	CACHE_SCOPE_NOTEXISTS,
+	CACHE_SCOPE_PRIVATE,
+	CACHE_SCOPE_SHARED,
+	CACHE_SCOPE_RESERVED,
+};
+
+enum {
+	CTYPE_SEPARATE,
+	CTYPE_DATA,
+	CTYPE_INSTRUCTION,
+	CTYPE_UNIFIED,
+};
+
+enum {
+	EXTRACT_TOPOLOGY,
+	EXTRACT_LINE_SIZE,
+	EXTRACT_SIZE,
+	EXTRACT_ASSOCIATIVITY,
+};
+
+enum {
+	CACHE_TI_UNIFIED = 0,
+	CACHE_TI_DATA = 0,
+	CACHE_TI_INSTRUCTION,
+};
+
+struct cache_info {
+	unsigned char	    : 4;
+	unsigned char scope : 2;
+	unsigned char type  : 2;
+};
+
+#define CACHE_MAX_LEVEL 8
+union cache_topology {
+	struct cache_info ci[CACHE_MAX_LEVEL];
+	unsigned long long raw;
+};
+
+static const char * const cache_type_string[] = {
+	"",
+	"Instruction",
+	"Data",
+	"",
+	"Unified",
+};
+
+static const enum cache_type cache_type_map[] = {
+	[CTYPE_SEPARATE] = CACHE_TYPE_SEPARATE,
+	[CTYPE_DATA] = CACHE_TYPE_DATA,
+	[CTYPE_INSTRUCTION] = CACHE_TYPE_INST,
+	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
+void show_cacheinfo(struct seq_file *m)
+{
+	struct cpu_cacheinfo *this_cpu_ci;
+	struct cacheinfo *cache;
+	int idx;
+
+	if (!test_facility(34))
+		return;
+	get_online_cpus();
+	this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
+	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+		cache = this_cpu_ci->info_list + idx;
+		seq_printf(m, "cache%-11d: ", idx);
+		seq_printf(m, "level=%d ", cache->level);
+		seq_printf(m, "type=%s ", cache_type_string[cache->type]);
+		seq_printf(m, "scope=%s ",
+			   cache->disable_sysfs ? "Shared" : "Private");
+		seq_printf(m, "size=%dK ", cache->size >> 10);
+		seq_printf(m, "line_size=%u ", cache->coherency_line_size);
+		seq_printf(m, "associativity=%d", cache->ways_of_associativity);
+		seq_puts(m, "\n");
+	}
+	put_online_cpus();
+}
+
+static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
+{
+	if (level >= CACHE_MAX_LEVEL)
+		return CACHE_TYPE_NOCACHE;
+	ci += level;
+	if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
+		return CACHE_TYPE_NOCACHE;
+	return cache_type_map[ci->type];
+}
+
+static inline unsigned long ecag(int ai, int li, int ti)
+{
+	unsigned long cmd, val;
+
+	cmd = ai << 4 | li << 1 | ti;
+	asm volatile(".insn	rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
+		     : "=d" (val) : "a" (cmd));
+	return val;
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
+			 enum cache_type type, unsigned int level, int cpu)
+{
+	int ti, num_sets;
+
+	if (type == CACHE_TYPE_INST)
+		ti = CACHE_TI_INSTRUCTION;
+	else
+		ti = CACHE_TI_UNIFIED;
+	this_leaf->level = level + 1;
+	this_leaf->type = type;
+	this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
+	this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
+	this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
+	num_sets = this_leaf->size / this_leaf->coherency_line_size;
+	num_sets /= this_leaf->ways_of_associativity;
+	this_leaf->number_of_sets = num_sets;
+	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+	if (!private)
+		this_leaf->disable_sysfs = true;
+}
+
+int init_cache_level(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	unsigned int level = 0, leaves = 0;
+	union cache_topology ct;
+	enum cache_type ctype;
+
+	if (!this_cpu_ci)
+		return -EINVAL;
+	ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+	do {
+		ctype = get_cache_type(&ct.ci[0], level);
+		if (ctype == CACHE_TYPE_NOCACHE)
+			break;
+		/* Separate instruction and data caches */
+		leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
+	} while (++level < CACHE_MAX_LEVEL);
+	this_cpu_ci->num_levels = level;
+	this_cpu_ci->num_leaves = leaves;
+	return 0;
+}
+
+int populate_cache_leaves(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+	unsigned int level, idx, pvt;
+	union cache_topology ct;
+	enum cache_type ctype;
+
+	if (!test_facility(34))
+		return -EOPNOTSUPP;
+	ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+	for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
+	     idx < this_cpu_ci->num_leaves; idx++, level++) {
+		if (!this_leaf)
+			return -EINVAL;
+		pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
+		ctype = get_cache_type(&ct.ci[0], level);
+		if (ctype == CACHE_TYPE_SEPARATE) {
+			ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
+			ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
+		} else {
+			ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
+		}
+	}
+	return 0;
+}
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
new file mode 100644
index 000000000..d6487bf87
--- /dev/null
+++ b/arch/s390/kernel/compat_audit.c
@@ -0,0 +1,44 @@
+#undef __s390x__
+#include <asm/unistd.h>
+#include "audit.h"
+
+unsigned s390_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned s390_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned s390_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned s390_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned s390_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int s390_classify_syscall(unsigned syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return 2;
+	case __NR_openat:
+		return 3;
+	case __NR_socketcall:
+		return 4;
+	case __NR_execve:
+		return 5;
+	default:
+		return 1;
+	}
+}
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
new file mode 100644
index 000000000..437e61159
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.c
@@ -0,0 +1,520 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Gerhard Tonn (ton@de.ibm.com)   
+ *               Thomas Spatzier (tspat@de.ibm.com)
+ *
+ *  Conversion between 31bit and 64bit native syscalls.
+ *
+ * Heavily inspired by the 32-bit Sparc compat code which is 
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h> 
+#include <linux/mm.h> 
+#include <linux/file.h> 
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/uio.h>
+#include <linux/quota.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/filter.h>
+#include <linux/highmem.h>
+#include <linux/highuid.h>
+#include <linux/mman.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/icmpv6.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/binfmts.h>
+#include <linux/capability.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/ptrace.h>
+#include <linux/fadvise.h>
+#include <linux/ipc.h>
+#include <linux/slab.h>
+
+#include <asm/types.h>
+#include <asm/uaccess.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+
+#include "compat_linux.h"
+
+/* For this source file, we want overflow handling. */
+
+#undef high2lowuid
+#undef high2lowgid
+#undef low2highuid
+#undef low2highgid
+#undef SET_UID16
+#undef SET_GID16
+#undef NEW_TO_OLD_UID
+#undef NEW_TO_OLD_GID
+#undef SET_OLDSTAT_UID
+#undef SET_OLDSTAT_GID
+#undef SET_STAT_UID
+#undef SET_STAT_GID
+
+#define high2lowuid(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid)
+#define high2lowgid(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid)
+#define low2highuid(uid) ((uid) == (u16)-1) ? (uid_t)-1 : (uid_t)(uid)
+#define low2highgid(gid) ((gid) == (u16)-1) ? (gid_t)-1 : (gid_t)(gid)
+#define SET_UID16(var, uid)	var = high2lowuid(uid)
+#define SET_GID16(var, gid)	var = high2lowgid(gid)
+#define NEW_TO_OLD_UID(uid)	high2lowuid(uid)
+#define NEW_TO_OLD_GID(gid)	high2lowgid(gid)
+#define SET_OLDSTAT_UID(stat, uid)	(stat).st_uid = high2lowuid(uid)
+#define SET_OLDSTAT_GID(stat, gid)	(stat).st_gid = high2lowgid(gid)
+#define SET_STAT_UID(stat, uid)		(stat).st_uid = high2lowuid(uid)
+#define SET_STAT_GID(stat, gid)		(stat).st_gid = high2lowgid(gid)
+
+COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename,
+		       u16, user, u16, group)
+{
+	return sys_chown(filename, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *,
+		       filename, u16, user, u16, group)
+{
+	return sys_lchown(filename, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group)
+{
+	return sys_fchown(fd, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid)
+{
+	return sys_setregid(low2highgid(rgid), low2highgid(egid));
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid)
+{
+	return sys_setgid((gid_t)gid);
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
+{
+	return sys_setreuid(low2highuid(ruid), low2highuid(euid));
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid)
+{
+	return sys_setuid((uid_t)uid);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid)
+{
+	return sys_setresuid(low2highuid(ruid), low2highuid(euid),
+			     low2highuid(suid));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp,
+		       u16 __user *, euidp, u16 __user *, suidp)
+{
+	const struct cred *cred = current_cred();
+	int retval;
+	u16 ruid, euid, suid;
+
+	ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid));
+	euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid));
+	suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid));
+
+	if (!(retval   = put_user(ruid, ruidp)) &&
+	    !(retval   = put_user(euid, euidp)))
+		retval = put_user(suid, suidp);
+
+	return retval;
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid)
+{
+	return sys_setresgid(low2highgid(rgid), low2highgid(egid),
+			     low2highgid(sgid));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp,
+		       u16 __user *, egidp, u16 __user *, sgidp)
+{
+	const struct cred *cred = current_cred();
+	int retval;
+	u16 rgid, egid, sgid;
+
+	rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid));
+	egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid));
+	sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid));
+
+	if (!(retval   = put_user(rgid, rgidp)) &&
+	    !(retval   = put_user(egid, egidp)))
+		retval = put_user(sgid, sgidp);
+
+	return retval;
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid)
+{
+	return sys_setfsuid((uid_t)uid);
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid)
+{
+	return sys_setfsgid((gid_t)gid);
+}
+
+static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info)
+{
+	struct user_namespace *user_ns = current_user_ns();
+	int i;
+	u16 group;
+	kgid_t kgid;
+
+	for (i = 0; i < group_info->ngroups; i++) {
+		kgid = GROUP_AT(group_info, i);
+		group = (u16)from_kgid_munged(user_ns, kgid);
+		if (put_user(group, grouplist+i))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist)
+{
+	struct user_namespace *user_ns = current_user_ns();
+	int i;
+	u16 group;
+	kgid_t kgid;
+
+	for (i = 0; i < group_info->ngroups; i++) {
+		if (get_user(group, grouplist+i))
+			return  -EFAULT;
+
+		kgid = make_kgid(user_ns, (gid_t)group);
+		if (!gid_valid(kgid))
+			return -EINVAL;
+
+		GROUP_AT(group_info, i) = kgid;
+	}
+
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist)
+{
+	const struct cred *cred = current_cred();
+	int i;
+
+	if (gidsetsize < 0)
+		return -EINVAL;
+
+	get_group_info(cred->group_info);
+	i = cred->group_info->ngroups;
+	if (gidsetsize) {
+		if (i > gidsetsize) {
+			i = -EINVAL;
+			goto out;
+		}
+		if (groups16_to_user(grouplist, cred->group_info)) {
+			i = -EFAULT;
+			goto out;
+		}
+	}
+out:
+	put_group_info(cred->group_info);
+	return i;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist)
+{
+	struct group_info *group_info;
+	int retval;
+
+	if (!may_setgroups())
+		return -EPERM;
+	if ((unsigned)gidsetsize > NGROUPS_MAX)
+		return -EINVAL;
+
+	group_info = groups_alloc(gidsetsize);
+	if (!group_info)
+		return -ENOMEM;
+	retval = groups16_from_user(group_info, grouplist);
+	if (retval) {
+		put_group_info(group_info);
+		return retval;
+	}
+
+	retval = set_current_groups(group_info);
+	put_group_info(group_info);
+
+	return retval;
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getuid16)
+{
+	return high2lowuid(from_kuid_munged(current_user_ns(), current_uid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_geteuid16)
+{
+	return high2lowuid(from_kuid_munged(current_user_ns(), current_euid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getgid16)
+{
+	return high2lowgid(from_kgid_munged(current_user_ns(), current_gid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getegid16)
+{
+	return high2lowgid(from_kgid_munged(current_user_ns(), current_egid()));
+}
+
+#ifdef CONFIG_SYSVIPC
+COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
+		compat_ulong_t, third, compat_uptr_t, ptr)
+{
+	if (call >> 16)		/* hack for backward compatibility */
+		return -EINVAL;
+	return compat_sys_ipc(call, first, second, third, ptr, third);
+}
+#endif
+
+COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
+{
+	return sys_truncate(path, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
+{
+	return sys_ftruncate(fd, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
+		       compat_size_t, count, u32, high, u32, low)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL;
+	return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
+		       compat_size_t, count, u32, high, u32, low)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL;
+	return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
+{
+	return sys_readahead(fd, (unsigned long)high << 32 | low, count);
+}
+
+struct stat64_emu31 {
+	unsigned long long  st_dev;
+	unsigned int    __pad1;
+#define STAT64_HAS_BROKEN_ST_INO        1
+	u32             __st_ino;
+	unsigned int    st_mode;
+	unsigned int    st_nlink;
+	u32             st_uid;
+	u32             st_gid;
+	unsigned long long  st_rdev;
+	unsigned int    __pad3;
+	long            st_size;
+	u32             st_blksize;
+	unsigned char   __pad4[4];
+	u32             __pad5;     /* future possible st_blocks high bits */
+	u32             st_blocks;  /* Number 512-byte blocks allocated. */
+	u32             st_atime;
+	u32             __pad6;
+	u32             st_mtime;
+	u32             __pad7;
+	u32             st_ctime;
+	u32             __pad8;     /* will be high 32 bits of ctime someday */
+	unsigned long   st_ino;
+};	
+
+static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
+{
+	struct stat64_emu31 tmp;
+
+	memset(&tmp, 0, sizeof(tmp));
+
+	tmp.st_dev = huge_encode_dev(stat->dev);
+	tmp.st_ino = stat->ino;
+	tmp.__st_ino = (u32)stat->ino;
+	tmp.st_mode = stat->mode;
+	tmp.st_nlink = (unsigned int)stat->nlink;
+	tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
+	tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	tmp.st_rdev = huge_encode_dev(stat->rdev);
+	tmp.st_size = stat->size;
+	tmp.st_blksize = (u32)stat->blksize;
+	tmp.st_blocks = (u32)stat->blocks;
+	tmp.st_atime = (u32)stat->atime.tv_sec;
+	tmp.st_mtime = (u32)stat->mtime.tv_sec;
+	tmp.st_ctime = (u32)stat->ctime.tv_sec;
+
+	return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; 
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_stat(filename, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_lstat(filename, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_fstat(fd, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
+		       struct stat64_emu31 __user *, statbuf, int, flag)
+{
+	struct kstat stat;
+	int error;
+
+	error = vfs_fstatat(dfd, filename, &stat, flag);
+	if (error)
+		return error;
+	return cp_stat64(statbuf, &stat);
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct_emu31 {
+	compat_ulong_t addr;
+	compat_ulong_t len;
+	compat_ulong_t prot;
+	compat_ulong_t flags;
+	compat_ulong_t fd;
+	compat_ulong_t offset;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
+{
+	struct mmap_arg_struct_emu31 a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	if (a.offset & ~PAGE_MASK)
+		return -EINVAL;
+	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			      a.offset >> PAGE_SHIFT);
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
+{
+	struct mmap_arg_struct_emu31 a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL; 
+
+	return sys_read(fd, buf, count);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL; 
+
+	return sys_write(fd, buf, count);
+}
+
+/*
+ * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
+ * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
+ * because the 31 bit values differ from the 64 bit values.
+ */
+
+COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
+{
+	if (advise == 4)
+		advise = POSIX_FADV_DONTNEED;
+	else if (advise == 5)
+		advise = POSIX_FADV_NOREUSE;
+	return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise);
+}
+
+struct fadvise64_64_args {
+	int fd;
+	long long offset;
+	long long len;
+	int advice;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
+{
+	struct fadvise64_64_args a;
+
+	if ( copy_from_user(&a, args, sizeof(a)) )
+		return -EFAULT;
+	if (a.advice == 4)
+		a.advice = POSIX_FADV_DONTNEED;
+	else if (a.advice == 5)
+		a.advice = POSIX_FADV_NOREUSE;
+	return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
+		       u32, nhigh, u32, nlow, unsigned int, flags)
+{
+	return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
+				   ((u64)nhigh << 32) + nlow, flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
+		       u32, lenhigh, u32, lenlow)
+{
+	return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
+			     ((u64)lenhigh << 32) + lenlow);
+}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
new file mode 100644
index 000000000..a0a886c04
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.h
@@ -0,0 +1,129 @@
+#ifndef _ASM_S390X_S390_H
+#define _ASM_S390X_S390_H
+
+#include <linux/compat.h>
+#include <linux/socket.h>
+#include <linux/syscalls.h>
+
+/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
+/*       to a 64 bit pointer */
+#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
+#define AA(__x)				\
+	((unsigned long)(__x))
+
+/* Now 32bit compatibility types */
+struct ipc_kludge_32 {
+        __u32   msgp;                           /* pointer              */
+        __s32   msgtyp;
+};
+
+/* asm/sigcontext.h */
+typedef union
+{
+	__u64   d;
+	__u32   f; 
+} freg_t32;
+
+typedef struct
+{
+	unsigned int	fpc;
+	unsigned int	pad;
+	freg_t32	fprs[__NUM_FPRS];              
+} _s390_fp_regs32;
+
+typedef struct 
+{
+        __u32   mask;
+        __u32	addr;
+} _psw_t32 __attribute__ ((aligned(8)));
+
+typedef struct
+{
+	_psw_t32	psw;
+	__u32		gprs[__NUM_GPRS];
+	__u32		acrs[__NUM_ACRS];
+} _s390_regs_common32;
+
+typedef struct
+{
+	_s390_regs_common32 regs;
+	_s390_fp_regs32     fpregs;
+} _sigregs32;
+
+typedef struct
+{
+	__u32 gprs_high[__NUM_GPRS];
+	__u64 vxrs_low[__NUM_VXRS_LOW];
+	__vector128 vxrs_high[__NUM_VXRS_HIGH];
+	__u8 __reserved[128];
+} _sigregs_ext32;
+
+#define _SIGCONTEXT_NSIG32	64
+#define _SIGCONTEXT_NSIG_BPW32	32
+#define __SIGNAL_FRAMESIZE32	96
+#define _SIGMASK_COPY_SIZE32	(sizeof(u32)*2)
+
+struct sigcontext32
+{
+	__u32	oldmask[_COMPAT_NSIG_WORDS];
+	__u32	sregs;				/* pointer */
+};
+
+/* asm/signal.h */
+
+/* asm/ucontext.h */
+struct ucontext32 {
+	__u32			uc_flags;
+	__u32			uc_link;	/* pointer */	
+	compat_stack_t		uc_stack;
+	_sigregs32		uc_mcontext;
+	compat_sigset_t		uc_sigmask;
+	/* Allow for uc_sigmask growth.  Glibc uses a 1024-bit sigset_t.  */
+	unsigned char		__unused[128 - sizeof(compat_sigset_t)];
+	_sigregs_ext32		uc_mcontext_ext;
+};
+
+struct stat64_emu31;
+struct mmap_arg_struct_emu31;
+struct fadvise64_64_args;
+
+long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group);
+long compat_sys_s390_setregid16(u16 rgid, u16 egid);
+long compat_sys_s390_setgid16(u16 gid);
+long compat_sys_s390_setreuid16(u16 ruid, u16 euid);
+long compat_sys_s390_setuid16(u16 uid);
+long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid);
+long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
+long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid);
+long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
+long compat_sys_s390_setfsuid16(u16 uid);
+long compat_sys_s390_setfsgid16(u16 gid);
+long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_getuid16(void);
+long compat_sys_s390_geteuid16(void);
+long compat_sys_s390_getgid16(void);
+long compat_sys_s390_getegid16(void);
+long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
+long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
+long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count);
+long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
+long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count);
+long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
+long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
+long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+
+#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
new file mode 100644
index 000000000..12b823833
--- /dev/null
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -0,0 +1,63 @@
+#ifndef _PTRACE32_H
+#define _PTRACE32_H
+
+#include <asm/ptrace.h>    /* needed for NUM_CR_WORDS */
+#include "compat_linux.h"  /* needed for psw_compat_t */
+
+struct compat_per_struct_kernel {
+	__u32 cr9;		/* PER control bits */
+	__u32 cr10;		/* PER starting address */
+	__u32 cr11;		/* PER ending address */
+	__u32 bits;		/* Obsolete software bits */
+	__u32 starting_addr;	/* User specified start address */
+	__u32 ending_addr;	/* User specified end address */
+	__u16 perc_atmid;	/* PER trap ATMID */
+	__u32 address;		/* PER trap instruction address */
+	__u8  access_id;	/* PER trap access identification */
+};
+
+struct compat_user_regs_struct
+{
+	psw_compat_t psw;
+	u32 gprs[NUM_GPRS];
+	u32 acrs[NUM_ACRS];
+	u32 orig_gpr2;
+	/* nb: there's a 4-byte hole here */
+	s390_fp_regs fp_regs;
+	/*
+	 * These per registers are in here so that gdb can modify them
+	 * itself as there is no "official" ptrace interface for hardware
+	 * watchpoints. This is the way intel does it.
+	 */
+	struct compat_per_struct_kernel per_info;
+	u32  ieee_instruction_pointer;	/* obsolete, always 0 */
+};
+
+struct compat_user {
+	/* We start with the registers, to mimic the way that "memory"
+	   is returned from the ptrace(3,...) function.  */
+	struct compat_user_regs_struct regs;
+	/* The rest of this junk is to help gdb figure out what goes where */
+	u32 u_tsize;		/* Text segment size (pages). */
+	u32 u_dsize;	        /* Data segment size (pages). */
+	u32 u_ssize;	        /* Stack segment size (pages). */
+	u32 start_code;         /* Starting virtual address of text. */
+	u32 start_stack;	/* Starting virtual address of stack area.
+				   This is actually the bottom of the stack,
+				   the top of the stack is always found in the
+				   esp register.  */
+	s32 signal;     	 /* Signal that caused the core dump. */
+	u32 u_ar0;               /* Used by gdb to help find the values for */
+	                         /* the registers. */
+	u32 magic;		 /* To uniquely identify a core file */
+	char u_comm[32];	 /* User command that was responsible */
+};
+
+typedef struct
+{
+	__u32   len;
+	__u32   kernel_addr;
+	__u32   process_addr;
+} compat_ptrace_area;
+
+#endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
new file mode 100644
index 000000000..fe8d6924e
--- /dev/null
+++ b/arch/s390/kernel/compat_signal.c
@@ -0,0 +1,549 @@
+/*
+ *    Copyright IBM Corp. 2000, 2006
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *               Gerhard Tonn (ton@de.ibm.com)                  
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include "compat_linux.h"
+#include "compat_ptrace.h"
+#include "entry.h"
+
+typedef struct 
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+	struct sigcontext32 sc;
+	_sigregs32 sregs;
+	int signo;
+	_sigregs_ext32 sregs_ext;
+	__u16 svc_insn;		/* Offset of svc_insn is NOT fixed! */
+} sigframe32;
+
+typedef struct 
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+	__u16 svc_insn;
+	compat_siginfo_t info;
+	struct ucontext32 uc;
+} rt_sigframe32;
+
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
+{
+	int err;
+
+	/* If you change siginfo_t structure, please be sure
+	   this code is fixed accordingly.
+	   It should never copy any pad contained in the structure
+	   to avoid security leaks, but must copy the generic
+	   3 ints plus the relevant union member.  
+	   This routine must convert siginfo from 64bit to 32bit as well
+	   at the same time.  */
+	err = __put_user(from->si_signo, &to->si_signo);
+	err |= __put_user(from->si_errno, &to->si_errno);
+	err |= __put_user((short)from->si_code, &to->si_code);
+	if (from->si_code < 0)
+		err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
+	else {
+		switch (from->si_code >> 16) {
+		case __SI_RT >> 16: /* This is not generated by the kernel as of now.  */
+		case __SI_MESGQ >> 16:
+			err |= __put_user(from->si_int, &to->si_int);
+			/* fallthrough */
+		case __SI_KILL >> 16:
+			err |= __put_user(from->si_pid, &to->si_pid);
+			err |= __put_user(from->si_uid, &to->si_uid);
+			break;
+		case __SI_CHLD >> 16:
+			err |= __put_user(from->si_pid, &to->si_pid);
+			err |= __put_user(from->si_uid, &to->si_uid);
+			err |= __put_user(from->si_utime, &to->si_utime);
+			err |= __put_user(from->si_stime, &to->si_stime);
+			err |= __put_user(from->si_status, &to->si_status);
+			break;
+		case __SI_FAULT >> 16:
+			err |= __put_user((unsigned long) from->si_addr,
+					  &to->si_addr);
+			break;
+		case __SI_POLL >> 16:
+			err |= __put_user(from->si_band, &to->si_band);
+			err |= __put_user(from->si_fd, &to->si_fd);
+			break;
+		case __SI_TIMER >> 16:
+			err |= __put_user(from->si_tid, &to->si_tid);
+			err |= __put_user(from->si_overrun, &to->si_overrun);
+			err |= __put_user(from->si_int, &to->si_int);
+			break;
+		default:
+			break;
+		}
+	}
+	return err ? -EFAULT : 0;
+}
+
+int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
+{
+	int err;
+	u32 tmp;
+
+	err = __get_user(to->si_signo, &from->si_signo);
+	err |= __get_user(to->si_errno, &from->si_errno);
+	err |= __get_user(to->si_code, &from->si_code);
+
+	if (to->si_code < 0)
+		err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
+	else {
+		switch (to->si_code >> 16) {
+		case __SI_RT >> 16: /* This is not generated by the kernel as of now.  */
+		case __SI_MESGQ >> 16:
+			err |= __get_user(to->si_int, &from->si_int);
+			/* fallthrough */
+		case __SI_KILL >> 16:
+			err |= __get_user(to->si_pid, &from->si_pid);
+			err |= __get_user(to->si_uid, &from->si_uid);
+			break;
+		case __SI_CHLD >> 16:
+			err |= __get_user(to->si_pid, &from->si_pid);
+			err |= __get_user(to->si_uid, &from->si_uid);
+			err |= __get_user(to->si_utime, &from->si_utime);
+			err |= __get_user(to->si_stime, &from->si_stime);
+			err |= __get_user(to->si_status, &from->si_status);
+			break;
+		case __SI_FAULT >> 16:
+			err |= __get_user(tmp, &from->si_addr);
+			to->si_addr = (void __force __user *)
+				(u64) (tmp & PSW32_ADDR_INSN);
+			break;
+		case __SI_POLL >> 16:
+			err |= __get_user(to->si_band, &from->si_band);
+			err |= __get_user(to->si_fd, &from->si_fd);
+			break;
+		case __SI_TIMER >> 16:
+			err |= __get_user(to->si_tid, &from->si_tid);
+			err |= __get_user(to->si_overrun, &from->si_overrun);
+			err |= __get_user(to->si_int, &from->si_int);
+			break;
+		default:
+			break;
+		}
+	}
+	return err ? -EFAULT : 0;
+}
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+	int i;
+
+	save_access_regs(current->thread.acrs);
+	save_fp_ctl(&current->thread.fp_regs.fpc);
+	if (current->thread.vxrs) {
+		save_vx_regs(current->thread.vxrs);
+		for (i = 0; i < __NUM_FPRS; i++)
+			current->thread.fp_regs.fprs[i] =
+				*(freg_t *)(current->thread.vxrs + i);
+	} else
+		save_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+	int i;
+
+	restore_access_regs(current->thread.acrs);
+	/* restore_fp_ctl is done in restore_sigregs */
+	if (current->thread.vxrs) {
+		for (i = 0; i < __NUM_FPRS; i++)
+			*(freg_t *)(current->thread.vxrs + i) =
+				current->thread.fp_regs.fprs[i];
+		restore_vx_regs(current->thread.vxrs);
+	} else
+		restore_fp_regs(current->thread.fp_regs.fprs);
+}
+
+static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
+{
+	_sigregs32 user_sregs;
+	int i;
+
+	user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32);
+	user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI;
+	user_sregs.regs.psw.mask |= PSW32_USER_BITS;
+	user_sregs.regs.psw.addr = (__u32) regs->psw.addr |
+		(__u32)(regs->psw.mask & PSW_MASK_BA);
+	for (i = 0; i < NUM_GPRS; i++)
+		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
+	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+	       sizeof(user_sregs.regs.acrs));
+	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
+	       sizeof(user_sregs.fpregs));
+	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
+		return -EFAULT;
+	return 0;
+}
+
+static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
+{
+	_sigregs32 user_sregs;
+	int i;
+
+	/* Alwys make any pending restarted system call return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
+		return -EFAULT;
+
+	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
+		return -EINVAL;
+
+	/* Loading the floating-point-control word can fail. Do that first. */
+	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+		return -EINVAL;
+
+	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+	regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+		(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
+		(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 |
+		(__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE);
+	/* Check for invalid user address space control. */
+	if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+		regs->psw.mask = PSW_ASC_PRIMARY |
+			(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN);
+	for (i = 0; i < NUM_GPRS; i++)
+		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
+	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+	       sizeof(current->thread.acrs));
+
+	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
+	       sizeof(current->thread.fp_regs));
+
+	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+	return 0;
+}
+
+static int save_sigregs_ext32(struct pt_regs *regs,
+			      _sigregs_ext32 __user *sregs_ext)
+{
+	__u32 gprs_high[NUM_GPRS];
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Save high gprs to signal stack */
+	for (i = 0; i < NUM_GPRS; i++)
+		gprs_high[i] = regs->gprs[i] >> 32;
+	if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high,
+			   sizeof(sregs_ext->gprs_high)))
+		return -EFAULT;
+
+	/* Save vector registers to signal stack */
+	if (current->thread.vxrs) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+				   sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_to_user(&sregs_ext->vxrs_high,
+				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int restore_sigregs_ext32(struct pt_regs *regs,
+				 _sigregs_ext32 __user *sregs_ext)
+{
+	__u32 gprs_high[NUM_GPRS];
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Restore high gprs from signal stack */
+	if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
+			     sizeof(&sregs_ext->gprs_high)))
+		return -EFAULT;
+	for (i = 0; i < NUM_GPRS; i++)
+		*(__u32 *)&regs->gprs[i] = gprs_high[i];
+
+	/* Restore vector registers from signal stack */
+	if (current->thread.vxrs) {
+		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+				     sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+				     &sregs_ext->vxrs_high,
+				     sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+	}
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
+		goto badframe;
+	set_current_blocked(&set);
+	if (restore_sigregs32(regs, &frame->sregs))
+		goto badframe;
+	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+	set_current_blocked(&set);
+	if (compat_restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}	
+
+/*
+ * Set up a signal frame.
+ */
+
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = (unsigned long) A(regs->gprs[15]);
+
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (! sas_ss_flags(sp))
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame32(struct ksignal *ksig, sigset_t *set,
+			 struct pt_regs *regs)
+{
+	int sig = ksig->sig;
+	sigframe32 __user *frame;
+	struct sigcontext32 sc;
+	unsigned long restorer;
+	size_t frame_size;
+
+	/*
+	 * gprs_high are always present for 31-bit compat tasks.
+	 * The space for vector registers is only allocated if
+	 * the machine supports it
+	 */
+	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
+	if (!MACHINE_HAS_VX)
+		frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
+			      sizeof(frame->sregs_ext.vxrs_high);
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+		return -EFAULT;
+
+	/* Create struct sigcontext32 on the signal stack */
+	memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32);
+	sc.sregs = (__u32)(unsigned long __force) &frame->sregs;
+	if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create _sigregs32 on the signal stack */
+	if (save_sigregs32(regs, &frame->sregs))
+		return -EFAULT;
+
+	/* Place signal number on stack to allow backtrace from handler.  */
+	if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
+		return -EFAULT;
+
+	/* Create _sigregs_ext32 on the signal stack */
+	if (save_sigregs_ext32(regs, &frame->sregs_ext))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		restorer = (unsigned long __force)
+			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+	} else {
+		/* Signal frames without vectors registers are short ! */
+		__u16 __user *svc = (void __user *) frame + frame_size - 2;
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
+			return -EFAULT;
+		restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
+        }
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (__force __u64) frame;
+	/* Force 31 bit amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler;
+
+	regs->gprs[2] = sig;
+	regs->gprs[3] = (__force __u64) &frame->sc;
+
+	/* We forgot to include these in the sigcontext.
+	   To avoid breaking binary compatibility, they are passed as args. */
+	if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+	    sig == SIGTRAP || sig == SIGFPE) {
+		/* set extra registers only for synchronous signals */
+		regs->gprs[4] = regs->int_code & 127;
+		regs->gprs[5] = regs->int_parm_long;
+		regs->gprs[6] = task_thread_info(current)->last_break;
+	}
+
+	return 0;
+}
+
+static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
+			    struct pt_regs *regs)
+{
+	rt_sigframe32 __user *frame;
+	unsigned long restorer;
+	size_t frame_size;
+	u32 uc_flags;
+
+	frame_size = sizeof(*frame) -
+		     sizeof(frame->uc.uc_mcontext_ext.__reserved);
+	/*
+	 * gprs_high are always present for 31-bit compat tasks.
+	 * The space for vector registers is only allocated if
+	 * the machine supports it
+	 */
+	uc_flags = UC_GPRS_HIGH;
+	if (MACHINE_HAS_VX) {
+		if (current->thread.vxrs)
+			uc_flags |= UC_VXRS;
+	} else
+		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
+			      sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		restorer = (unsigned long __force)
+			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+	} else {
+		__u16 __user *svc = &frame->svc_insn;
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
+			return -EFAULT;
+		restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
+	}
+
+	/* Create siginfo on the signal stack */
+	if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create ucontext on the signal stack. */
+	if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+	    __put_user(0, &frame->uc.uc_link) ||
+	    __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+	    save_sigregs32(regs, &frame->uc.uc_mcontext) ||
+	    __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+	    save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (__force __u64) frame;
+	/* Force 31 bit amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler;
+
+	regs->gprs[2] = ksig->sig;
+	regs->gprs[3] = (__force __u64) &frame->info;
+	regs->gprs[4] = (__force __u64) &frame->uc;
+	regs->gprs[5] = task_thread_info(current)->last_break;
+	return 0;
+}
+
+/*
+ * OK, we're invoking a handler
+ */	
+
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+		     struct pt_regs *regs)
+{
+	int ret;
+
+	/* Set up the stack frame */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame32(ksig, oldset, regs);
+	else
+		ret = setup_frame32(ksig, oldset, regs);
+
+	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
new file mode 100644
index 000000000..d7fa2f0f1
--- /dev/null
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -0,0 +1,222 @@
+/*
+ *  Compat system call wrappers.
+ *
+ *    Copyright IBM Corp. 2014
+ */
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include "entry.h"
+
+#define COMPAT_SYSCALL_WRAP1(name, ...) \
+	COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP2(name, ...) \
+	COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP3(name, ...) \
+	COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP4(name, ...) \
+	COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP5(name, ...) \
+	COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP6(name, ...) \
+	COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__)
+
+#define __SC_COMPAT_TYPE(t, a) \
+	__typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a
+
+#define __SC_COMPAT_CAST(t, a)						\
+({									\
+	long __ReS = a;							\
+									\
+	BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) &&		\
+		     !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t));		\
+	if (__TYPE_IS_L(t))						\
+		__ReS = (s32)a;						\
+	if (__TYPE_IS_UL(t))						\
+		__ReS = (u32)a;						\
+	if (__TYPE_IS_PTR(t))						\
+		__ReS = a & 0x7fffffff;					\
+	(t)__ReS;							\
+})
+
+/*
+ * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by
+ * compat tasks. These wrappers will only be used for system calls where only
+ * the system call arguments need sign or zero extension or zeroing of the upper
+ * 33 bits of pointers.
+ * Note: since the wrapper function will afterwards call a system call which
+ * again performs zero and sign extension for all system call arguments with
+ * a size of less than eight bytes, these compat wrappers only touch those
+ * system call arguments with a size of eight bytes ((unsigned) long and
+ * pointers). Zero and sign extension for e.g. int parameters will be done by
+ * the regular system call wrappers.
+ */
+#define COMPAT_SYSCALL_WRAPx(x, name, ...)					\
+	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));		\
+	asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\
+	asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__))	\
+	{									\
+		return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__));	\
+	}
+
+COMPAT_SYSCALL_WRAP1(exit, int, error_code);
+COMPAT_SYSCALL_WRAP1(close, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename);
+COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name);
+COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds);
+COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(nice, int, increment);
+COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes);
+COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes);
+COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk);
+COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler);
+COMPAT_SYSCALL_WRAP1(acct, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags);
+COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid);
+COMPAT_SYSCALL_WRAP1(umask, int, mask);
+COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename);
+COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd);
+COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask);
+COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new);
+COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library);
+COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags);
+COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg);
+COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len);
+COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who);
+COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval);
+COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len);
+COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile);
+COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name);
+COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot);
+COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs);
+COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr);
+COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data);
+COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2);
+COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality);
+COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence);
+COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd);
+COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP1(mlockall, int, flags);
+COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy);
+COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy);
+COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr);
+COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout);
+COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5);
+COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size);
+COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr);
+COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data);
+COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid);
+COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid);
+COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid);
+COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid);
+COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid);
+COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid);
+COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid);
+COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid);
+COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid);
+COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid);
+COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old);
+COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec);
+COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior);
+COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count);
+COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name);
+COMPAT_SYSCALL_WRAP1(exit_group, int, error_code);
+COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr);
+COMPAT_SYSCALL_WRAP1(epoll_create, int, size);
+COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event);
+COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout);
+COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id);
+COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id);
+COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx);
+COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result);
+COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name);
+COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id);
+COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id);
+COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags);
+COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio);
+COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who);
+COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask);
+COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd);
+COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag);
+COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag);
+COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags);
+COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags);
+COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache);
+COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count);
+COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags);
+COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags);
+COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags);
+COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags);
+COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags);
+COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags);
+COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags);
+COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val);
+COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags);
+COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim);
+COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag);
+COMPAT_SYSCALL_WRAP1(syncfs, int, fd);
+COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype);
+COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum);
+COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2);
+COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
+COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs)
+COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
+COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
+COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
new file mode 100644
index 000000000..199ec92ef
--- /dev/null
+++ b/arch/s390/kernel/cpcmd.c
@@ -0,0 +1,113 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2007
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#define KMSG_COMPONENT "cpcmd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <asm/ebcdic.h>
+#include <asm/cpcmd.h>
+#include <asm/io.h>
+
+static DEFINE_SPINLOCK(cpcmd_lock);
+static char cpcmd_buf[241];
+
+static int diag8_noresponse(int cmdlen)
+{
+	register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+	register unsigned long reg3 asm ("3") = cmdlen;
+
+	asm volatile(
+		"	sam31\n"
+		"	diag	%1,%0,0x8\n"
+		"	sam64\n"
+		: "+d" (reg3) : "d" (reg2) : "cc");
+	return reg3;
+}
+
+static int diag8_response(int cmdlen, char *response, int *rlen)
+{
+	register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+	register unsigned long reg3 asm ("3") = (addr_t) response;
+	register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L;
+	register unsigned long reg5 asm ("5") = *rlen;
+
+	asm volatile(
+		"	sam31\n"
+		"	diag	%2,%0,0x8\n"
+		"	sam64\n"
+		"	brc	8,1f\n"
+		"	agr	%1,%4\n"
+		"1:\n"
+		: "+d" (reg4), "+d" (reg5)
+		: "d" (reg2), "d" (reg3), "d" (*rlen) : "cc");
+	*rlen = reg5;
+	return reg4;
+}
+
+/*
+ * __cpcmd has some restrictions over cpcmd
+ *  - the response buffer must reside below 2GB (if any)
+ *  - __cpcmd is unlocked and therefore not SMP-safe
+ */
+int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+	int cmdlen;
+	int rc;
+	int response_len;
+
+	cmdlen = strlen(cmd);
+	BUG_ON(cmdlen > 240);
+	memcpy(cpcmd_buf, cmd, cmdlen);
+	ASCEBC(cpcmd_buf, cmdlen);
+
+	if (response) {
+		memset(response, 0, rlen);
+		response_len = rlen;
+		rc = diag8_response(cmdlen, response, &rlen);
+		EBCASC(response, response_len);
+        } else {
+		rc = diag8_noresponse(cmdlen);
+        }
+	if (response_code)
+		*response_code = rc;
+	return rlen;
+}
+EXPORT_SYMBOL(__cpcmd);
+
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+	char *lowbuf;
+	int len;
+	unsigned long flags;
+
+	if ((virt_to_phys(response) != (unsigned long) response) ||
+			(((unsigned long)response + rlen) >> 31)) {
+		lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
+		if (!lowbuf) {
+			pr_warning("The cpcmd kernel function failed to "
+				   "allocate a response buffer\n");
+			return -ENOMEM;
+		}
+		spin_lock_irqsave(&cpcmd_lock, flags);
+		len = __cpcmd(cmd, lowbuf, rlen, response_code);
+		spin_unlock_irqrestore(&cpcmd_lock, flags);
+		memcpy(response, lowbuf, rlen);
+		kfree(lowbuf);
+	} else {
+		spin_lock_irqsave(&cpcmd_lock, flags);
+		len = __cpcmd(cmd, response, rlen, response_code);
+		spin_unlock_irqrestore(&cpcmd_lock, flags);
+	}
+	return len;
+}
+EXPORT_SYMBOL(cpcmd);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
new file mode 100644
index 000000000..49b74454d
--- /dev/null
+++ b/arch/s390/kernel/crash_dump.c
@@ -0,0 +1,687 @@
+/*
+ * S390 kdump implementation
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/crash_dump.h>
+#include <asm/lowcore.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <linux/memblock.h>
+#include <asm/os_info.h>
+#include <asm/elf.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+
+#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
+#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
+#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+	.cnt = 1,
+	.max = 1,
+	.total_size = 0,
+	.regions = &oldmem_region,
+};
+
+#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid)		\
+	for (i = 0, __next_mem_range(&i, nid, &memblock.physmem,	\
+				     &oldmem_type, p_start,		\
+				     p_end, p_nid);			\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_mem_range(&i, nid, &memblock.physmem,		\
+			      &oldmem_type,				\
+			      p_start, p_end, p_nid))
+
+struct dump_save_areas dump_save_areas;
+
+/*
+ * Allocate and add a save area for a CPU
+ */
+struct save_area_ext *dump_save_area_create(int cpu)
+{
+	struct save_area_ext **save_areas, *save_area;
+
+	save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
+	if (!save_area)
+		return NULL;
+	if (cpu + 1 > dump_save_areas.count) {
+		dump_save_areas.count = cpu + 1;
+		save_areas = krealloc(dump_save_areas.areas,
+				      dump_save_areas.count * sizeof(void *),
+				      GFP_KERNEL | __GFP_ZERO);
+		if (!save_areas) {
+			kfree(save_area);
+			return NULL;
+		}
+		dump_save_areas.areas = save_areas;
+	}
+	dump_save_areas.areas[cpu] = save_area;
+	return save_area;
+}
+
+/*
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+	unsigned long real_addr;
+
+	asm volatile(
+		   "	lra     %0,0(%1)\n"
+		   "	jz	0f\n"
+		   "	la	%0,0\n"
+		   "0:"
+		   : "=a" (real_addr) : "a" (addr) : "cc");
+	return (void *)real_addr;
+}
+
+/*
+ * Copy real to virtual or real memory
+ */
+static int copy_from_realmem(void *dest, void *src, size_t count)
+{
+	unsigned long size;
+
+	if (!count)
+		return 0;
+	if (!is_vmalloc_or_module_addr(dest))
+		return memcpy_real(dest, src, count);
+	do {
+		size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK));
+		if (memcpy_real(load_real_addr(dest), src, size))
+			return -EFAULT;
+		count -= size;
+		dest += size;
+		src += size;
+	} while (count);
+	return 0;
+}
+
+/*
+ * Pointer to ELF header in new kernel
+ */
+static void *elfcorehdr_newmem;
+
+/*
+ * Copy one page from zfcpdump "oldmem"
+ *
+ * For pages below HSA size memory from the HSA is copied. Otherwise
+ * real memory copy is used.
+ */
+static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
+					 unsigned long src, int userbuf)
+{
+	int rc;
+
+	if (src < sclp_get_hsa_size()) {
+		rc = memcpy_hsa(buf, src, csize, userbuf);
+	} else {
+		if (userbuf)
+			rc = copy_to_user_real((void __force __user *) buf,
+					       (void *) src, csize);
+		else
+			rc = memcpy_real(buf, (void *) src, csize);
+	}
+	return rc ? rc : csize;
+}
+
+/*
+ * Copy one page from kdump "oldmem"
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ *  - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
+ *  - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
+				      unsigned long src, int userbuf)
+
+{
+	int rc;
+
+	if (src < OLDMEM_SIZE)
+		src += OLDMEM_BASE;
+	else if (src > OLDMEM_BASE &&
+		 src < OLDMEM_BASE + OLDMEM_SIZE)
+		src -= OLDMEM_BASE;
+	if (userbuf)
+		rc = copy_to_user_real((void __force __user *) buf,
+				       (void *) src, csize);
+	else
+		rc = copy_from_realmem(buf, (void *) src, csize);
+	return (rc == 0) ? rc : csize;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+			 unsigned long offset, int userbuf)
+{
+	unsigned long src;
+
+	if (!csize)
+		return 0;
+	src = (pfn << PAGE_SHIFT) + offset;
+	if (OLDMEM_BASE)
+		return copy_oldmem_page_kdump(buf, csize, src, userbuf);
+	else
+		return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf);
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+					unsigned long from, unsigned long pfn,
+					unsigned long size, pgprot_t prot)
+{
+	unsigned long size_old;
+	int rc;
+
+	if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) {
+		size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT));
+		rc = remap_pfn_range(vma, from,
+				     pfn + (OLDMEM_BASE >> PAGE_SHIFT),
+				     size_old, prot);
+		if (rc || size == size_old)
+			return rc;
+		size -= size_old;
+		from += size_old;
+		pfn += size_old >> PAGE_SHIFT;
+	}
+	return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcpdump
+ *
+ * We only map available memory above HSA size. Memory below HSA size
+ * is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+					   unsigned long from,
+					   unsigned long pfn,
+					   unsigned long size, pgprot_t prot)
+{
+	unsigned long hsa_end = sclp_get_hsa_size();
+	unsigned long size_hsa;
+
+	if (pfn < hsa_end >> PAGE_SHIFT) {
+		size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT));
+		if (size == size_hsa)
+			return 0;
+		size -= size_hsa;
+		from += size_hsa;
+		pfn += size_hsa >> PAGE_SHIFT;
+	}
+	return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcpdump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+			   unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+	if (OLDMEM_BASE)
+		return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
+	else
+		return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+						       prot);
+}
+
+/*
+ * Copy memory from old kernel
+ */
+int copy_from_oldmem(void *dest, void *src, size_t count)
+{
+	unsigned long copied = 0;
+	int rc;
+
+	if (OLDMEM_BASE) {
+		if ((unsigned long) src < OLDMEM_SIZE) {
+			copied = min(count, OLDMEM_SIZE - (unsigned long) src);
+			rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied);
+			if (rc)
+				return rc;
+		}
+	} else {
+		unsigned long hsa_end = sclp_get_hsa_size();
+		if ((unsigned long) src < hsa_end) {
+			copied = min(count, hsa_end - (unsigned long) src);
+			rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
+			if (rc)
+				return rc;
+		}
+	}
+	return copy_from_realmem(dest + copied, src + copied, count - copied);
+}
+
+/*
+ * Alloc memory and panic in case of ENOMEM
+ */
+static void *kzalloc_panic(int len)
+{
+	void *rc;
+
+	rc = kzalloc(len, GFP_KERNEL);
+	if (!rc)
+		panic("s390 kdump kzalloc (%d) failed", len);
+	return rc;
+}
+
+/*
+ * Initialize ELF note
+ */
+static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
+		     const char *name)
+{
+	Elf64_Nhdr *note;
+	u64 len;
+
+	note = (Elf64_Nhdr *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = d_len;
+	note->n_type = type;
+	len = sizeof(Elf64_Nhdr);
+
+	memcpy(buf + len, name, note->n_namesz);
+	len = roundup(len + note->n_namesz, 4);
+
+	memcpy(buf + len, desc, note->n_descsz);
+	len = roundup(len + note->n_descsz, 4);
+
+	return PTR_ADD(buf, len);
+}
+
+/*
+ * Initialize prstatus note
+ */
+static void *nt_prstatus(void *ptr, struct save_area *sa)
+{
+	struct elf_prstatus nt_prstatus;
+	static int cpu_nr = 1;
+
+	memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+	memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs));
+	memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+	memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs));
+	nt_prstatus.pr_pid = cpu_nr;
+	cpu_nr++;
+
+	return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus),
+			 "CORE");
+}
+
+/*
+ * Initialize fpregset (floating point) note
+ */
+static void *nt_fpregset(void *ptr, struct save_area *sa)
+{
+	elf_fpregset_t nt_fpregset;
+
+	memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+	memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg));
+	memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs));
+
+	return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset),
+		       "CORE");
+}
+
+/*
+ * Initialize timer note
+ */
+static void *nt_s390_timer(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
+			 KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD clock comparator note
+ */
+static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
+		       sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD programmable register note
+ */
+static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
+		       sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize control register note
+ */
+static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
+		       sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize prefix register note
+ */
+static void *nt_s390_prefix(void *ptr, struct save_area *sa)
+{
+	return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
+			 sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize vxrs high note (full 128 bit VX registers 16-31)
+ */
+static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs)
+{
+	return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16],
+		       16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize vxrs low note (lower halves of VX registers 0-15)
+ */
+static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
+{
+	Elf64_Nhdr *note;
+	u64 len;
+	int i;
+
+	note = (Elf64_Nhdr *)ptr;
+	note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1;
+	note->n_descsz = 16 * 8;
+	note->n_type = NT_S390_VXRS_LOW;
+	len = sizeof(Elf64_Nhdr);
+
+	memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz);
+	len = roundup(len + note->n_namesz, 4);
+
+	ptr += len;
+	/* Copy lower halves of SIMD registers 0-15 */
+	for (i = 0; i < 16; i++) {
+		memcpy(ptr, &vx_regs[i].u[2], 8);
+		ptr += 8;
+	}
+	return ptr;
+}
+
+/*
+ * Fill ELF notes for one CPU with save area registers
+ */
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs)
+{
+	ptr = nt_prstatus(ptr, sa);
+	ptr = nt_fpregset(ptr, sa);
+	ptr = nt_s390_timer(ptr, sa);
+	ptr = nt_s390_tod_cmp(ptr, sa);
+	ptr = nt_s390_tod_preg(ptr, sa);
+	ptr = nt_s390_ctrs(ptr, sa);
+	ptr = nt_s390_prefix(ptr, sa);
+	if (MACHINE_HAS_VX && vx_regs) {
+		ptr = nt_s390_vx_low(ptr, vx_regs);
+		ptr = nt_s390_vx_high(ptr, vx_regs);
+	}
+	return ptr;
+}
+
+/*
+ * Initialize prpsinfo note (new kernel)
+ */
+static void *nt_prpsinfo(void *ptr)
+{
+	struct elf_prpsinfo prpsinfo;
+
+	memset(&prpsinfo, 0, sizeof(prpsinfo));
+	prpsinfo.pr_sname = 'R';
+	strcpy(prpsinfo.pr_fname, "vmlinux");
+	return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo),
+		       KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Get vmcoreinfo using lowcore->vmcore_info (new kernel)
+ */
+static void *get_vmcoreinfo_old(unsigned long *size)
+{
+	char nt_name[11], *vmcoreinfo;
+	Elf64_Nhdr note;
+	void *addr;
+
+	if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
+		return NULL;
+	memset(nt_name, 0, sizeof(nt_name));
+	if (copy_from_oldmem(&note, addr, sizeof(note)))
+		return NULL;
+	if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1))
+		return NULL;
+	if (strcmp(nt_name, "VMCOREINFO") != 0)
+		return NULL;
+	vmcoreinfo = kzalloc_panic(note.n_descsz);
+	if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz))
+		return NULL;
+	*size = note.n_descsz;
+	return vmcoreinfo;
+}
+
+/*
+ * Initialize vmcoreinfo note (new kernel)
+ */
+static void *nt_vmcoreinfo(void *ptr)
+{
+	unsigned long size;
+	void *vmcoreinfo;
+
+	vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+	if (!vmcoreinfo)
+		vmcoreinfo = get_vmcoreinfo_old(&size);
+	if (!vmcoreinfo)
+		return ptr;
+	return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO");
+}
+
+/*
+ * Initialize ELF header (new kernel)
+ */
+static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+{
+	memset(ehdr, 0, sizeof(*ehdr));
+	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+	ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+	ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
+	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+	memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+	ehdr->e_type = ET_CORE;
+	ehdr->e_machine = EM_S390;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_phoff = sizeof(Elf64_Ehdr);
+	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+	ehdr->e_phentsize = sizeof(Elf64_Phdr);
+	ehdr->e_phnum = mem_chunk_cnt + 1;
+	return ehdr + 1;
+}
+
+/*
+ * Return CPU count for ELF header (new kernel)
+ */
+static int get_cpu_cnt(void)
+{
+	int i, cpus = 0;
+
+	for (i = 0; i < dump_save_areas.count; i++) {
+		if (dump_save_areas.areas[i]->sa.pref_reg == 0)
+			continue;
+		cpus++;
+	}
+	return cpus;
+}
+
+/*
+ * Return memory chunk count for ELF header (new kernel)
+ */
+static int get_mem_chunk_cnt(void)
+{
+	int cnt = 0;
+	u64 idx;
+
+	for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL)
+		cnt++;
+	return cnt;
+}
+
+/*
+ * Initialize ELF loads (new kernel)
+ */
+static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+{
+	phys_addr_t start, end;
+	u64 idx;
+
+	for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) {
+		phdr->p_filesz = end - start;
+		phdr->p_type = PT_LOAD;
+		phdr->p_offset = start;
+		phdr->p_vaddr = start;
+		phdr->p_paddr = start;
+		phdr->p_memsz = end - start;
+		phdr->p_flags = PF_R | PF_W | PF_X;
+		phdr->p_align = PAGE_SIZE;
+		phdr++;
+	}
+}
+
+/*
+ * Initialize notes (new kernel)
+ */
+static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
+{
+	struct save_area_ext *sa_ext;
+	void *ptr_start = ptr;
+	int i;
+
+	ptr = nt_prpsinfo(ptr);
+
+	for (i = 0; i < dump_save_areas.count; i++) {
+		sa_ext = dump_save_areas.areas[i];
+		if (sa_ext->sa.pref_reg == 0)
+			continue;
+		ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs);
+	}
+	ptr = nt_vmcoreinfo(ptr);
+	memset(phdr, 0, sizeof(*phdr));
+	phdr->p_type = PT_NOTE;
+	phdr->p_offset = notes_offset;
+	phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
+	phdr->p_memsz = phdr->p_filesz;
+	return ptr;
+}
+
+/*
+ * Create ELF core header (new kernel)
+ */
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
+{
+	Elf64_Phdr *phdr_notes, *phdr_loads;
+	int mem_chunk_cnt;
+	void *ptr, *hdr;
+	u32 alloc_size;
+	u64 hdr_off;
+
+	/* If we are not in kdump or zfcpdump mode return */
+	if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+		return 0;
+	/* If elfcorehdr= has been passed via cmdline, we use that one */
+	if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
+		return 0;
+	/* If we cannot get HSA size for zfcpdump return error */
+	if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size())
+		return -ENODEV;
+
+	/* For kdump, exclude previous crashkernel memory */
+	if (OLDMEM_BASE) {
+		oldmem_region.base = OLDMEM_BASE;
+		oldmem_region.size = OLDMEM_SIZE;
+		oldmem_type.total_size = OLDMEM_SIZE;
+	}
+
+	mem_chunk_cnt = get_mem_chunk_cnt();
+
+	alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 +
+		mem_chunk_cnt * sizeof(Elf64_Phdr);
+	hdr = kzalloc_panic(alloc_size);
+	/* Init elf header */
+	ptr = ehdr_init(hdr, mem_chunk_cnt);
+	/* Init program headers */
+	phdr_notes = ptr;
+	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
+	phdr_loads = ptr;
+	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+	/* Init notes */
+	hdr_off = PTR_DIFF(ptr, hdr);
+	ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+	/* Init loads */
+	hdr_off = PTR_DIFF(ptr, hdr);
+	loads_init(phdr_loads, hdr_off);
+	*addr = (unsigned long long) hdr;
+	elfcorehdr_newmem = hdr;
+	*size = (unsigned long long) hdr_off;
+	BUG_ON(elfcorehdr_size > alloc_size);
+	return 0;
+}
+
+/*
+ * Free ELF core header (new kernel)
+ */
+void elfcorehdr_free(unsigned long long addr)
+{
+	if (!elfcorehdr_newmem)
+		return;
+	kfree((void *)(unsigned long)addr);
+}
+
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	void *src = (void *)(unsigned long)*ppos;
+
+	src = elfcorehdr_newmem ? src : src - OLDMEM_BASE;
+	memcpy(buf, src, count);
+	*ppos += count;
+	return count;
+}
+
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+	void *src = (void *)(unsigned long)*ppos;
+	int rc;
+
+	if (elfcorehdr_newmem) {
+		memcpy(buf, src, count);
+	} else {
+		rc = copy_from_oldmem(buf, src, count);
+		if (rc)
+			return rc;
+	}
+	*ppos += count;
+	return count;
+}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
new file mode 100644
index 000000000..c1f21aca7
--- /dev/null
+++ b/arch/s390/kernel/debug.c
@@ -0,0 +1,1540 @@
+/*
+ *   S/390 debug facility
+ *
+ *    Copyright IBM Corp. 1999, 2012
+ *
+ *    Author(s): Michael Holzheu (holzheu@de.ibm.com),
+ *               Holger Smolinski (Holger.Smolinski@de.ibm.com)
+ *
+ *    Bugreports to: <Linux390@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "s390dbf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/sysctl.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+
+#include <asm/debug.h>
+
+#define DEBUG_PROLOG_ENTRY -1
+
+#define ALL_AREAS 0 /* copy all debug areas */
+#define NO_AREAS  1 /* copy no debug areas */
+
+/* typedefs */
+
+typedef struct file_private_info {
+	loff_t offset;			/* offset of last read in file */
+	int    act_area;                /* number of last formated area */
+	int    act_page;                /* act page in given area */
+	int    act_entry;               /* last formated entry (offset */
+                                        /* relative to beginning of last */
+                                        /* formated page) */
+	size_t act_entry_offset;        /* up to this offset we copied */
+					/* in last read the last formated */
+					/* entry to userland */
+	char   temp_buf[2048];		/* buffer for output */
+	debug_info_t *debug_info_org;   /* original debug information */
+	debug_info_t *debug_info_snap;	/* snapshot of debug information */
+	struct debug_view *view;	/* used view of debug info */
+} file_private_info_t;
+
+typedef struct
+{
+	char *string;
+	/* 
+	 * This assumes that all args are converted into longs 
+	 * on L/390 this is the case for all types of parameter 
+	 * except of floats, and long long (32 bit) 
+	 *
+	 */
+	long args[0];
+} debug_sprintf_entry_t;
+
+
+/* internal function prototyes */
+
+static int debug_init(void);
+static ssize_t debug_output(struct file *file, char __user *user_buf,
+			size_t user_len, loff_t * offset);
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+			size_t user_len, loff_t * offset);
+static int debug_open(struct inode *inode, struct file *file);
+static int debug_close(struct inode *inode, struct file *file);
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+			int nr_areas, int buf_size, umode_t mode);
+static void debug_info_get(debug_info_t *);
+static void debug_info_put(debug_info_t *);
+static int debug_prolog_level_fn(debug_info_t * id,
+			struct debug_view *view, char *out_buf);
+static int debug_input_level_fn(debug_info_t * id, struct debug_view *view,
+			struct file *file, const char __user *user_buf,
+			size_t user_buf_size, loff_t * offset);
+static int debug_prolog_pages_fn(debug_info_t * id,
+			struct debug_view *view, char *out_buf);
+static int debug_input_pages_fn(debug_info_t * id, struct debug_view *view,
+			struct file *file, const char __user *user_buf,
+			size_t user_buf_size, loff_t * offset);
+static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view,
+			struct file *file, const char __user *user_buf,
+			size_t user_buf_size, loff_t * offset);
+static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,
+			char *out_buf, const char *in_buf);
+static int debug_raw_format_fn(debug_info_t * id,
+			struct debug_view *view, char *out_buf,
+			const char *in_buf);
+static int debug_raw_header_fn(debug_info_t * id, struct debug_view *view,
+			int area, debug_entry_t * entry, char *out_buf);
+
+static int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view,
+			char *out_buf, debug_sprintf_entry_t *curr_event);
+
+/* globals */
+
+struct debug_view debug_raw_view = {
+	"raw",
+	NULL,
+	&debug_raw_header_fn,
+	&debug_raw_format_fn,
+	NULL,
+	NULL
+};
+EXPORT_SYMBOL(debug_raw_view);
+
+struct debug_view debug_hex_ascii_view = {
+	"hex_ascii",
+	NULL,
+	&debug_dflt_header_fn,
+	&debug_hex_ascii_format_fn,
+	NULL,
+	NULL
+};
+EXPORT_SYMBOL(debug_hex_ascii_view);
+
+static struct debug_view debug_level_view = {
+	"level",
+	&debug_prolog_level_fn,
+	NULL,
+	NULL,
+	&debug_input_level_fn,
+	NULL
+};
+
+static struct debug_view debug_pages_view = {
+	"pages",
+	&debug_prolog_pages_fn,
+	NULL,
+	NULL,
+	&debug_input_pages_fn,
+	NULL
+};
+
+static struct debug_view debug_flush_view = {
+        "flush",
+        NULL,
+        NULL,
+        NULL,
+        &debug_input_flush_fn,
+        NULL
+};
+
+struct debug_view debug_sprintf_view = {
+	"sprintf",
+	NULL,
+	&debug_dflt_header_fn,
+	(debug_format_proc_t*)&debug_sprintf_format_fn,
+	NULL,
+	NULL
+};
+EXPORT_SYMBOL(debug_sprintf_view);
+
+/* used by dump analysis tools to determine version of debug feature */
+static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
+
+/* static globals */
+
+static debug_info_t *debug_area_first = NULL;
+static debug_info_t *debug_area_last = NULL;
+static DEFINE_MUTEX(debug_mutex);
+
+static int initialized;
+static int debug_critical;
+
+static const struct file_operations debug_file_ops = {
+	.owner   = THIS_MODULE,
+	.read    = debug_output,
+	.write   = debug_input,
+	.open    = debug_open,
+	.release = debug_close,
+	.llseek  = no_llseek,
+};
+
+static struct dentry *debug_debugfs_root_entry;
+
+/* functions */
+
+/*
+ * debug_areas_alloc
+ * - Debug areas are implemented as a threedimensonal array:
+ *   areas[areanumber][pagenumber][pageoffset]
+ */
+
+static debug_entry_t***
+debug_areas_alloc(int pages_per_area, int nr_areas)
+{
+	debug_entry_t*** areas;
+	int i,j;
+
+	areas = kmalloc(nr_areas *
+					sizeof(debug_entry_t**),
+					GFP_KERNEL);
+	if (!areas)
+		goto fail_malloc_areas;
+	for (i = 0; i < nr_areas; i++) {
+		areas[i] = kmalloc(pages_per_area *
+				sizeof(debug_entry_t*),GFP_KERNEL);
+		if (!areas[i]) {
+			goto fail_malloc_areas2;
+		}
+		for(j = 0; j < pages_per_area; j++) {
+			areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			if(!areas[i][j]) {
+				for(j--; j >=0 ; j--) {
+					kfree(areas[i][j]);
+				}
+				kfree(areas[i]);
+				goto fail_malloc_areas2;
+			}
+		}
+	}
+	return areas;
+
+fail_malloc_areas2:
+	for(i--; i >= 0; i--){
+		for(j=0; j < pages_per_area;j++){
+			kfree(areas[i][j]);
+		}
+		kfree(areas[i]);
+	}
+	kfree(areas);
+fail_malloc_areas:
+	return NULL;
+
+}
+
+
+/*
+ * debug_info_alloc
+ * - alloc new debug-info
+ */
+
+static debug_info_t*
+debug_info_alloc(const char *name, int pages_per_area, int nr_areas,
+		 int buf_size, int level, int mode)
+{
+	debug_info_t* rc;
+
+	/* alloc everything */
+
+	rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL);
+	if(!rc)
+		goto fail_malloc_rc;
+	rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+	if(!rc->active_entries)
+		goto fail_malloc_active_entries;
+	rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+	if(!rc->active_pages)
+		goto fail_malloc_active_pages;
+	if((mode == ALL_AREAS) && (pages_per_area != 0)){
+		rc->areas = debug_areas_alloc(pages_per_area, nr_areas);
+		if(!rc->areas)
+			goto fail_malloc_areas;
+	} else {
+		rc->areas = NULL;
+	}
+
+	/* initialize members */
+
+	spin_lock_init(&rc->lock);
+	rc->pages_per_area = pages_per_area;
+	rc->nr_areas       = nr_areas;
+	rc->active_area    = 0;
+	rc->level          = level;
+	rc->buf_size       = buf_size;
+	rc->entry_size     = sizeof(debug_entry_t) + buf_size;
+	strlcpy(rc->name, name, sizeof(rc->name));
+	memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
+	memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS *
+		sizeof(struct dentry*));
+	atomic_set(&(rc->ref_count), 0);
+
+	return rc;
+
+fail_malloc_areas:
+	kfree(rc->active_pages);
+fail_malloc_active_pages:
+	kfree(rc->active_entries);
+fail_malloc_active_entries:
+	kfree(rc);
+fail_malloc_rc:
+	return NULL;
+}
+
+/*
+ * debug_areas_free
+ * - free all debug areas
+ */
+
+static void
+debug_areas_free(debug_info_t* db_info)
+{
+	int i,j;
+
+	if(!db_info->areas)
+		return;
+	for (i = 0; i < db_info->nr_areas; i++) {
+		for(j = 0; j < db_info->pages_per_area; j++) {
+			kfree(db_info->areas[i][j]);
+		}
+		kfree(db_info->areas[i]);
+	}
+	kfree(db_info->areas);
+	db_info->areas = NULL;
+}
+
+/*
+ * debug_info_free
+ * - free memory debug-info
+ */
+
+static void
+debug_info_free(debug_info_t* db_info){
+	debug_areas_free(db_info);
+	kfree(db_info->active_entries);
+	kfree(db_info->active_pages);
+	kfree(db_info);
+}
+
+/*
+ * debug_info_create
+ * - create new debug-info
+ */
+
+static debug_info_t*
+debug_info_create(const char *name, int pages_per_area, int nr_areas,
+		  int buf_size, umode_t mode)
+{
+	debug_info_t* rc;
+
+        rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size,
+				DEBUG_DEFAULT_LEVEL, ALL_AREAS);
+        if(!rc) 
+		goto out;
+
+	rc->mode = mode & ~S_IFMT;
+
+	/* create root directory */
+        rc->debugfs_root_entry = debugfs_create_dir(rc->name,
+					debug_debugfs_root_entry);
+
+	/* append new element to linked list */
+        if (!debug_area_first) {
+                /* first element in list */
+                debug_area_first = rc;
+                rc->prev = NULL;
+        } else {
+                /* append element to end of list */
+                debug_area_last->next = rc;
+                rc->prev = debug_area_last;
+        }
+        debug_area_last = rc;
+        rc->next = NULL;
+
+	debug_info_get(rc);
+out:
+	return rc;
+}
+
+/*
+ * debug_info_copy
+ * - copy debug-info
+ */
+
+static debug_info_t*
+debug_info_copy(debug_info_t* in, int mode)
+{
+        int i,j;
+        debug_info_t* rc;
+        unsigned long flags;
+
+	/* get a consistent copy of the debug areas */
+	do {
+		rc = debug_info_alloc(in->name, in->pages_per_area,
+			in->nr_areas, in->buf_size, in->level, mode);
+		spin_lock_irqsave(&in->lock, flags);
+		if(!rc)
+			goto out;
+		/* has something changed in the meantime ? */
+		if((rc->pages_per_area == in->pages_per_area) &&
+		   (rc->nr_areas == in->nr_areas)) {
+			break;
+		}
+		spin_unlock_irqrestore(&in->lock, flags);
+		debug_info_free(rc);
+	} while (1);
+
+	if (mode == NO_AREAS)
+                goto out;
+
+        for(i = 0; i < in->nr_areas; i++){
+		for(j = 0; j < in->pages_per_area; j++) {
+			memcpy(rc->areas[i][j], in->areas[i][j],PAGE_SIZE);
+		}
+        }
+out:
+        spin_unlock_irqrestore(&in->lock, flags);
+        return rc;
+}
+
+/*
+ * debug_info_get
+ * - increments reference count for debug-info
+ */
+
+static void
+debug_info_get(debug_info_t * db_info)
+{
+	if (db_info)
+		atomic_inc(&db_info->ref_count);
+}
+
+/*
+ * debug_info_put:
+ * - decreases reference count for debug-info and frees it if necessary
+ */
+
+static void
+debug_info_put(debug_info_t *db_info)
+{
+	int i;
+
+	if (!db_info)
+		return;
+	if (atomic_dec_and_test(&db_info->ref_count)) {
+		for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+			if (!db_info->views[i])
+				continue;
+			debugfs_remove(db_info->debugfs_entries[i]);
+		}
+		debugfs_remove(db_info->debugfs_root_entry);
+		if(db_info == debug_area_first)
+			debug_area_first = db_info->next;
+		if(db_info == debug_area_last)
+			debug_area_last = db_info->prev;
+		if(db_info->prev) db_info->prev->next = db_info->next;
+		if(db_info->next) db_info->next->prev = db_info->prev;
+		debug_info_free(db_info);
+	}
+}
+
+/*
+ * debug_format_entry:
+ * - format one debug entry and return size of formated data
+ */
+
+static int
+debug_format_entry(file_private_info_t *p_info)
+{
+	debug_info_t *id_snap   = p_info->debug_info_snap;
+	struct debug_view *view = p_info->view;
+	debug_entry_t *act_entry;
+	size_t len = 0;
+	if(p_info->act_entry == DEBUG_PROLOG_ENTRY){
+		/* print prolog */
+        	if (view->prolog_proc)
+                	len += view->prolog_proc(id_snap,view,p_info->temp_buf);
+		goto out;
+	}
+	if (!id_snap->areas) /* this is true, if we have a prolog only view */
+		goto out;    /* or if 'pages_per_area' is 0 */
+	act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area]
+				[p_info->act_page] + p_info->act_entry);
+                        
+	if (act_entry->id.stck == 0LL)
+			goto out;  /* empty entry */
+	if (view->header_proc)
+		len += view->header_proc(id_snap, view, p_info->act_area,
+					act_entry, p_info->temp_buf + len);
+	if (view->format_proc)
+		len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+						DEBUG_DATA(act_entry));
+out:
+        return len;
+}
+
+/*
+ * debug_next_entry:
+ * - goto next entry in p_info
+ */
+
+static inline int
+debug_next_entry(file_private_info_t *p_info)
+{
+	debug_info_t *id;
+
+	id = p_info->debug_info_snap;
+	if(p_info->act_entry == DEBUG_PROLOG_ENTRY){
+		p_info->act_entry = 0;
+		p_info->act_page  = 0;
+		goto out;
+	}
+	if(!id->areas)
+		return 1;
+	p_info->act_entry += id->entry_size;
+	/* switch to next page, if we reached the end of the page  */
+	if (p_info->act_entry > (PAGE_SIZE - id->entry_size)){
+		/* next page */
+		p_info->act_entry = 0;
+		p_info->act_page += 1;
+		if((p_info->act_page % id->pages_per_area) == 0) {
+			/* next area */
+        		p_info->act_area++;
+			p_info->act_page=0;
+		}
+        	if(p_info->act_area >= id->nr_areas)
+			return 1;
+	}
+out:
+	return 0;	
+}
+
+/*
+ * debug_output:
+ * - called for user read()
+ * - copies formated debug entries to the user buffer
+ */
+
+static ssize_t
+debug_output(struct file *file,		/* file descriptor */
+	    char __user *user_buf,	/* user buffer */
+	    size_t  len,		/* length of buffer */
+	    loff_t *offset)		/* offset in the file */
+{
+	size_t count = 0;
+	size_t entry_offset;
+	file_private_info_t *p_info;
+
+	p_info = ((file_private_info_t *) file->private_data);
+	if (*offset != p_info->offset) 
+		return -EPIPE;
+	if(p_info->act_area >= p_info->debug_info_snap->nr_areas)
+		return 0;
+	entry_offset = p_info->act_entry_offset;
+	while(count < len){
+		int formatted_line_size;
+		int formatted_line_residue;
+		int user_buf_residue;
+		size_t copy_size;
+
+		formatted_line_size = debug_format_entry(p_info);
+		formatted_line_residue = formatted_line_size - entry_offset;
+		user_buf_residue = len-count;
+		copy_size = min(user_buf_residue, formatted_line_residue);
+		if(copy_size){
+			if (copy_to_user(user_buf + count, p_info->temp_buf
+					+ entry_offset, copy_size))
+				return -EFAULT;
+			count += copy_size;
+			entry_offset += copy_size;
+		}
+		if(copy_size == formatted_line_residue){
+			entry_offset = 0;
+			if(debug_next_entry(p_info))
+				goto out;
+		}
+	}
+out:
+	p_info->offset           = *offset + count;
+	p_info->act_entry_offset = entry_offset;
+	*offset = p_info->offset;
+	return count;
+}
+
+/*
+ * debug_input:
+ * - called for user write()
+ * - calls input function of view
+ */
+
+static ssize_t
+debug_input(struct file *file, const char __user *user_buf, size_t length,
+		loff_t *offset)
+{
+	int rc = 0;
+	file_private_info_t *p_info;
+
+	mutex_lock(&debug_mutex);
+	p_info = ((file_private_info_t *) file->private_data);
+	if (p_info->view->input_proc)
+		rc = p_info->view->input_proc(p_info->debug_info_org,
+					      p_info->view, file, user_buf,
+					      length, offset);
+	else
+		rc = -EPERM;
+	mutex_unlock(&debug_mutex);
+	return rc;		/* number of input characters */
+}
+
+/*
+ * debug_open:
+ * - called for user open()
+ * - copies formated output to private_data area of the file
+ *   handle
+ */
+
+static int
+debug_open(struct inode *inode, struct file *file)
+{
+	int i, rc = 0;
+	file_private_info_t *p_info;
+	debug_info_t *debug_info, *debug_info_snapshot;
+
+	mutex_lock(&debug_mutex);
+	debug_info = file_inode(file)->i_private;
+	/* find debug view */
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (!debug_info->views[i])
+			continue;
+		else if (debug_info->debugfs_entries[i] ==
+			 file->f_path.dentry) {
+			goto found;	/* found view ! */
+		}
+	}
+	/* no entry found */
+	rc = -EINVAL;
+	goto out;
+
+found:
+
+	/* Make snapshot of current debug areas to get it consistent.     */
+	/* To copy all the areas is only needed, if we have a view which  */
+	/* formats the debug areas. */
+
+	if(!debug_info->views[i]->format_proc &&
+		!debug_info->views[i]->header_proc){
+		debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+	} else {
+		debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+	}
+
+	if(!debug_info_snapshot){
+		rc = -ENOMEM;
+		goto out;
+	}
+	p_info = kmalloc(sizeof(file_private_info_t),
+						GFP_KERNEL);
+	if(!p_info){
+		debug_info_free(debug_info_snapshot);
+		rc = -ENOMEM;
+		goto out;
+	}
+	p_info->offset = 0;
+	p_info->debug_info_snap = debug_info_snapshot;
+	p_info->debug_info_org  = debug_info;
+	p_info->view = debug_info->views[i];
+	p_info->act_area = 0;
+	p_info->act_page = 0;
+	p_info->act_entry = DEBUG_PROLOG_ENTRY;
+	p_info->act_entry_offset = 0;
+	file->private_data = p_info;
+	debug_info_get(debug_info);
+	nonseekable_open(inode, file);
+out:
+	mutex_unlock(&debug_mutex);
+	return rc;
+}
+
+/*
+ * debug_close:
+ * - called for user close()
+ * - deletes  private_data area of the file handle
+ */
+
+static int
+debug_close(struct inode *inode, struct file *file)
+{
+	file_private_info_t *p_info;
+	p_info = (file_private_info_t *) file->private_data;
+	if(p_info->debug_info_snap)
+		debug_info_free(p_info->debug_info_snap);
+	debug_info_put(p_info->debug_info_org);
+	kfree(file->private_data);
+	return 0;		/* success */
+}
+
+/*
+ * debug_register_mode:
+ * - Creates and initializes debug area for the caller
+ *   The mode parameter allows to specify access rights for the s390dbf files
+ * - Returns handle for debug area
+ */
+
+debug_info_t *debug_register_mode(const char *name, int pages_per_area,
+				  int nr_areas, int buf_size, umode_t mode,
+				  uid_t uid, gid_t gid)
+{
+	debug_info_t *rc = NULL;
+
+	/* Since debugfs currently does not support uid/gid other than root, */
+	/* we do not allow gid/uid != 0 until we get support for that. */
+	if ((uid != 0) || (gid != 0))
+		pr_warning("Root becomes the owner of all s390dbf files "
+			   "in sysfs\n");
+	BUG_ON(!initialized);
+	mutex_lock(&debug_mutex);
+
+        /* create new debug_info */
+
+	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
+	if(!rc) 
+		goto out;
+	debug_register_view(rc, &debug_level_view);
+        debug_register_view(rc, &debug_flush_view);
+	debug_register_view(rc, &debug_pages_view);
+out:
+        if (!rc){
+		pr_err("Registering debug feature %s failed\n", name);
+        }
+	mutex_unlock(&debug_mutex);
+	return rc;
+}
+EXPORT_SYMBOL(debug_register_mode);
+
+/*
+ * debug_register:
+ * - creates and initializes debug area for the caller
+ * - returns handle for debug area
+ */
+
+debug_info_t *debug_register(const char *name, int pages_per_area,
+			     int nr_areas, int buf_size)
+{
+	return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
+				   S_IRUSR | S_IWUSR, 0, 0);
+}
+EXPORT_SYMBOL(debug_register);
+
+/*
+ * debug_unregister:
+ * - give back debug area
+ */
+
+void
+debug_unregister(debug_info_t * id)
+{
+	if (!id)
+		goto out;
+	mutex_lock(&debug_mutex);
+	debug_info_put(id);
+	mutex_unlock(&debug_mutex);
+
+out:
+	return;
+}
+EXPORT_SYMBOL(debug_unregister);
+
+/*
+ * debug_set_size:
+ * - set area size (number of pages) and number of areas
+ */
+static int
+debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area)
+{
+	unsigned long flags;
+	debug_entry_t *** new_areas;
+	int rc=0;
+
+	if(!id || (nr_areas <= 0) || (pages_per_area < 0))
+		return -EINVAL;
+	if(pages_per_area > 0){
+		new_areas = debug_areas_alloc(pages_per_area, nr_areas);
+		if(!new_areas) {
+			pr_info("Allocating memory for %i pages failed\n",
+				pages_per_area);
+			rc = -ENOMEM;
+			goto out;
+		}
+	} else {
+		new_areas = NULL;
+	}
+	spin_lock_irqsave(&id->lock,flags);
+	debug_areas_free(id);
+	id->areas = new_areas;
+	id->nr_areas = nr_areas;
+	id->pages_per_area = pages_per_area;
+	id->active_area = 0;
+	memset(id->active_entries,0,sizeof(int)*id->nr_areas);
+	memset(id->active_pages, 0, sizeof(int)*id->nr_areas);
+	spin_unlock_irqrestore(&id->lock,flags);
+	pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area);
+out:
+	return rc;
+}
+
+/*
+ * debug_set_level:
+ * - set actual debug level
+ */
+
+void
+debug_set_level(debug_info_t* id, int new_level)
+{
+	unsigned long flags;
+	if(!id)
+		return;	
+	spin_lock_irqsave(&id->lock,flags);
+        if(new_level == DEBUG_OFF_LEVEL){
+                id->level = DEBUG_OFF_LEVEL;
+		pr_info("%s: switched off\n",id->name);
+        } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
+		pr_info("%s: level %i is out of range (%i - %i)\n",
+                        id->name, new_level, 0, DEBUG_MAX_LEVEL);
+        } else {
+                id->level = new_level;
+        }
+	spin_unlock_irqrestore(&id->lock,flags);
+}
+EXPORT_SYMBOL(debug_set_level);
+
+/*
+ * proceed_active_entry:
+ * - set active entry to next in the ring buffer
+ */
+
+static inline void
+proceed_active_entry(debug_info_t * id)
+{
+	if ((id->active_entries[id->active_area] += id->entry_size)
+	    > (PAGE_SIZE - id->entry_size)){
+		id->active_entries[id->active_area] = 0;
+		id->active_pages[id->active_area] =
+			(id->active_pages[id->active_area] + 1) %
+			id->pages_per_area;
+	}
+}
+
+/*
+ * proceed_active_area:
+ * - set active area to next in the ring buffer
+ */
+
+static inline void
+proceed_active_area(debug_info_t * id)
+{
+	id->active_area++;
+	id->active_area = id->active_area % id->nr_areas;
+}
+
+/*
+ * get_active_entry:
+ */
+
+static inline debug_entry_t*
+get_active_entry(debug_info_t * id)
+{
+	return (debug_entry_t *) (((char *) id->areas[id->active_area]
+					[id->active_pages[id->active_area]]) +
+					id->active_entries[id->active_area]);
+}
+
+/*
+ * debug_finish_entry:
+ * - set timestamp, caller address, cpu number etc.
+ */
+
+static inline void
+debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
+			int exception)
+{
+	active->id.stck = get_tod_clock_fast();
+	active->id.fields.cpuid = smp_processor_id();
+	active->caller = __builtin_return_address(0);
+	active->id.fields.exception = exception;
+	active->id.fields.level     = level;
+	proceed_active_entry(id);
+	if(exception)
+		proceed_active_area(id);
+}
+
+static int debug_stoppable=1;
+static int debug_active=1;
+
+#define CTL_S390DBF_STOPPABLE 5678
+#define CTL_S390DBF_ACTIVE 5679
+
+/*
+ * proc handler for the running debug_active sysctl
+ * always allow read, allow write only if debug_stoppable is set or
+ * if debug_active is already off
+ */
+static int
+s390dbf_procactive(struct ctl_table *table, int write,
+                     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (!write || debug_stoppable || !debug_active)
+		return proc_dointvec(table, write, buffer, lenp, ppos);
+	else
+		return 0;
+}
+
+
+static struct ctl_table s390dbf_table[] = {
+	{
+		.procname       = "debug_stoppable",
+		.data		= &debug_stoppable,
+		.maxlen		= sizeof(int),
+		.mode           = S_IRUGO | S_IWUSR,
+		.proc_handler   = proc_dointvec,
+	},
+	 {
+		.procname       = "debug_active",
+		.data		= &debug_active,
+		.maxlen		= sizeof(int),
+		.mode           = S_IRUGO | S_IWUSR,
+		.proc_handler   = s390dbf_procactive,
+	},
+	{ }
+};
+
+static struct ctl_table s390dbf_dir_table[] = {
+	{
+		.procname       = "s390dbf",
+		.maxlen         = 0,
+		.mode           = S_IRUGO | S_IXUGO,
+		.child          = s390dbf_table,
+	},
+	{ }
+};
+
+static struct ctl_table_header *s390dbf_sysctl_header;
+
+void
+debug_stop_all(void)
+{
+	if (debug_stoppable)
+		debug_active = 0;
+}
+EXPORT_SYMBOL(debug_stop_all);
+
+void debug_set_critical(void)
+{
+	debug_critical = 1;
+}
+
+/*
+ * debug_event_common:
+ * - write debug entry with given size
+ */
+
+debug_entry_t*
+debug_event_common(debug_info_t * id, int level, const void *buf, int len)
+{
+	unsigned long flags;
+	debug_entry_t *active;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else
+		spin_lock_irqsave(&id->lock, flags);
+	active = get_active_entry(id);
+	memset(DEBUG_DATA(active), 0, id->buf_size);
+	memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+	debug_finish_entry(id, active, level, 0);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	return active;
+}
+EXPORT_SYMBOL(debug_event_common);
+
+/*
+ * debug_exception_common:
+ * - write debug entry with given size and switch to next debug area
+ */
+
+debug_entry_t
+*debug_exception_common(debug_info_t * id, int level, const void *buf, int len)
+{
+	unsigned long flags;
+	debug_entry_t *active;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else
+		spin_lock_irqsave(&id->lock, flags);
+	active = get_active_entry(id);
+	memset(DEBUG_DATA(active), 0, id->buf_size);
+	memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+	debug_finish_entry(id, active, level, 1);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	return active;
+}
+EXPORT_SYMBOL(debug_exception_common);
+
+/*
+ * counts arguments in format string for sprintf view
+ */
+
+static inline int
+debug_count_numargs(char *string)
+{
+	int numargs=0;
+
+	while(*string) {
+		if(*string++=='%')
+			numargs++;
+	}
+	return(numargs);
+}
+
+/*
+ * debug_sprintf_event:
+ */
+
+debug_entry_t*
+__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
+{
+	va_list   ap;
+	int numargs,idx;
+	unsigned long flags;
+	debug_sprintf_entry_t *curr_event;
+	debug_entry_t *active;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	numargs=debug_count_numargs(string);
+
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else
+		spin_lock_irqsave(&id->lock, flags);
+	active = get_active_entry(id);
+	curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active);
+	va_start(ap,string);
+	curr_event->string=string;
+	for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++)
+		curr_event->args[idx]=va_arg(ap,long);
+	va_end(ap);
+	debug_finish_entry(id, active, level, 0);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_event);
+
+/*
+ * debug_sprintf_exception:
+ */
+
+debug_entry_t*
+__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
+{
+	va_list   ap;
+	int numargs,idx;
+	unsigned long flags;
+	debug_sprintf_entry_t *curr_event;
+	debug_entry_t *active;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+
+	numargs=debug_count_numargs(string);
+
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else
+		spin_lock_irqsave(&id->lock, flags);
+	active = get_active_entry(id);
+	curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active);
+	va_start(ap,string);
+	curr_event->string=string;
+	for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++)
+		curr_event->args[idx]=va_arg(ap,long);
+	va_end(ap);
+	debug_finish_entry(id, active, level, 1);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_exception);
+
+/*
+ * debug_register_view:
+ */
+
+int
+debug_register_view(debug_info_t * id, struct debug_view *view)
+{
+	int rc = 0;
+	int i;
+	unsigned long flags;
+	umode_t mode;
+	struct dentry *pde;
+
+	if (!id)
+		goto out;
+	mode = (id->mode | S_IFREG) & ~S_IXUGO;
+	if (!(view->prolog_proc || view->format_proc || view->header_proc))
+		mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
+	if (!view->input_proc)
+		mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+	pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
+				id , &debug_file_ops);
+	if (!pde){
+		pr_err("Registering view %s/%s failed due to out of "
+		       "memory\n", id->name,view->name);
+		rc = -1;
+		goto out;
+	}
+	spin_lock_irqsave(&id->lock, flags);
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (!id->views[i])
+			break;
+	}
+	if (i == DEBUG_MAX_VIEWS) {
+		pr_err("Registering view %s/%s would exceed the maximum "
+		       "number of views %i\n", id->name, view->name, i);
+		rc = -1;
+	} else {
+		id->views[i] = view;
+		id->debugfs_entries[i] = pde;
+	}
+	spin_unlock_irqrestore(&id->lock, flags);
+	if (rc)
+		debugfs_remove(pde);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(debug_register_view);
+
+/*
+ * debug_unregister_view:
+ */
+
+int
+debug_unregister_view(debug_info_t * id, struct debug_view *view)
+{
+	struct dentry *dentry = NULL;
+	unsigned long flags;
+	int i, rc = 0;
+
+	if (!id)
+		goto out;
+	spin_lock_irqsave(&id->lock, flags);
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (id->views[i] == view)
+			break;
+	}
+	if (i == DEBUG_MAX_VIEWS)
+		rc = -1;
+	else {
+		dentry = id->debugfs_entries[i];
+		id->views[i] = NULL;
+		id->debugfs_entries[i] = NULL;
+	}
+	spin_unlock_irqrestore(&id->lock, flags);
+	debugfs_remove(dentry);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(debug_unregister_view);
+
+static inline char *
+debug_get_user_string(const char __user *user_buf, size_t user_len)
+{
+	char* buffer;
+
+	buffer = kmalloc(user_len + 1, GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	if (copy_from_user(buffer, user_buf, user_len) != 0) {
+		kfree(buffer);
+		return ERR_PTR(-EFAULT);
+	}
+	/* got the string, now strip linefeed. */
+	if (buffer[user_len - 1] == '\n')
+		buffer[user_len - 1] = 0;
+	else
+		buffer[user_len] = 0;
+        return buffer;
+}
+
+static inline int
+debug_get_uint(char *buf)
+{
+	int rc;
+
+	buf = skip_spaces(buf);
+	rc = simple_strtoul(buf, &buf, 10);
+	if(*buf){
+		rc = -EINVAL;
+	}
+	return rc;
+}
+
+/*
+ * functions for debug-views
+ ***********************************
+*/
+
+/*
+ * prints out actual debug level
+ */
+
+static int
+debug_prolog_pages_fn(debug_info_t * id,
+				 struct debug_view *view, char *out_buf)
+{
+	return sprintf(out_buf, "%i\n", id->pages_per_area);
+}
+
+/*
+ * reads new size (number of pages per debug area)
+ */
+
+static int
+debug_input_pages_fn(debug_info_t * id, struct debug_view *view,
+			struct file *file, const char __user *user_buf,
+			size_t user_len, loff_t * offset)
+{
+	char *str;
+	int rc,new_pages;
+
+	if (user_len > 0x10000)
+                user_len = 0x10000;
+	if (*offset != 0){
+		rc = -EPIPE;
+		goto out;
+	}
+	str = debug_get_user_string(user_buf,user_len);
+	if(IS_ERR(str)){
+		rc = PTR_ERR(str);
+		goto out;
+	}
+	new_pages = debug_get_uint(str);
+	if(new_pages < 0){
+		rc = -EINVAL;
+		goto free_str;
+	}
+	rc = debug_set_size(id,id->nr_areas, new_pages);
+	if(rc != 0){
+		rc = -EINVAL;
+		goto free_str;
+	}
+	rc = user_len;
+free_str:
+	kfree(str);
+out:
+	*offset += user_len;
+	return rc;		/* number of input characters */
+}
+
+/*
+ * prints out actual debug level
+ */
+
+static int
+debug_prolog_level_fn(debug_info_t * id, struct debug_view *view, char *out_buf)
+{
+	int rc = 0;
+
+	if(id->level == DEBUG_OFF_LEVEL) {
+		rc = sprintf(out_buf,"-\n");
+	}
+	else {
+		rc = sprintf(out_buf, "%i\n", id->level);
+	}
+	return rc;
+}
+
+/*
+ * reads new debug level
+ */
+
+static int
+debug_input_level_fn(debug_info_t * id, struct debug_view *view,
+			struct file *file, const char __user *user_buf,
+			size_t user_len, loff_t * offset)
+{
+	char *str;
+	int rc,new_level;
+
+	if (user_len > 0x10000)
+                user_len = 0x10000;
+	if (*offset != 0){
+		rc = -EPIPE;
+		goto out;
+	}
+	str = debug_get_user_string(user_buf,user_len);
+	if(IS_ERR(str)){
+		rc = PTR_ERR(str);
+		goto out;
+	}
+	if(str[0] == '-'){
+		debug_set_level(id, DEBUG_OFF_LEVEL);
+		rc = user_len;
+		goto free_str;
+	} else {
+		new_level = debug_get_uint(str);
+	}
+	if(new_level < 0) {
+		pr_warning("%s is not a valid level for a debug "
+			   "feature\n", str);
+		rc = -EINVAL;
+	} else {
+		debug_set_level(id, new_level);
+		rc = user_len;
+	}
+free_str:
+	kfree(str);
+out:
+	*offset += user_len;
+	return rc;		/* number of input characters */
+}
+
+
+/*
+ * flushes debug areas
+ */
+ 
+static void debug_flush(debug_info_t* id, int area)
+{
+        unsigned long flags;
+        int i,j;
+
+        if(!id || !id->areas)
+                return;
+        spin_lock_irqsave(&id->lock,flags);
+        if(area == DEBUG_FLUSH_ALL){
+                id->active_area = 0;
+                memset(id->active_entries, 0, id->nr_areas * sizeof(int));
+                for (i = 0; i < id->nr_areas; i++) {
+			id->active_pages[i] = 0;
+			for(j = 0; j < id->pages_per_area; j++) {
+                        	memset(id->areas[i][j], 0, PAGE_SIZE);
+			}
+		}
+        } else if(area >= 0 && area < id->nr_areas) {
+                id->active_entries[area] = 0;
+		id->active_pages[area] = 0;
+		for(i = 0; i < id->pages_per_area; i++) {
+                	memset(id->areas[area][i],0,PAGE_SIZE);
+		}
+        }
+        spin_unlock_irqrestore(&id->lock,flags);
+}
+
+/*
+ * view function: flushes debug areas 
+ */
+
+static int
+debug_input_flush_fn(debug_info_t * id, struct debug_view *view,
+			struct file *file, const char __user *user_buf,
+			size_t user_len, loff_t * offset)
+{
+        char input_buf[1];
+        int rc = user_len;
+
+	if (user_len > 0x10000)
+                user_len = 0x10000;
+        if (*offset != 0){
+		rc = -EPIPE;
+                goto out;
+	}
+        if (copy_from_user(input_buf, user_buf, 1)){
+                rc = -EFAULT;
+                goto out;
+        }
+        if(input_buf[0] == '-') { 
+                debug_flush(id, DEBUG_FLUSH_ALL);
+                goto out;
+        }
+        if (isdigit(input_buf[0])) {
+                int area = ((int) input_buf[0] - (int) '0');
+                debug_flush(id, area);
+                goto out;
+        }
+
+	pr_info("Flushing debug data failed because %c is not a valid "
+		 "area\n", input_buf[0]);
+
+out:
+        *offset += user_len;
+        return rc;              /* number of input characters */
+}
+
+/*
+ * prints debug header in raw format
+ */
+
+static int
+debug_raw_header_fn(debug_info_t * id, struct debug_view *view,
+			int area, debug_entry_t * entry, char *out_buf)
+{
+        int rc;
+
+	rc = sizeof(debug_entry_t);
+	memcpy(out_buf,entry,sizeof(debug_entry_t));
+        return rc;
+}
+
+/*
+ * prints debug data in raw format
+ */
+
+static int
+debug_raw_format_fn(debug_info_t * id, struct debug_view *view,
+			       char *out_buf, const char *in_buf)
+{
+	int rc;
+
+	rc = id->buf_size;
+	memcpy(out_buf, in_buf, id->buf_size);
+	return rc;
+}
+
+/*
+ * prints debug data in hex/ascii format
+ */
+
+static int
+debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,
+	    		  char *out_buf, const char *in_buf)
+{
+	int i, rc = 0;
+
+	for (i = 0; i < id->buf_size; i++) {
+                rc += sprintf(out_buf + rc, "%02x ",
+                              ((unsigned char *) in_buf)[i]);
+        }
+	rc += sprintf(out_buf + rc, "| ");
+	for (i = 0; i < id->buf_size; i++) {
+		unsigned char c = in_buf[i];
+		if (isascii(c) && isprint(c))
+			rc += sprintf(out_buf + rc, "%c", c);
+		else
+			rc += sprintf(out_buf + rc, ".");
+	}
+	rc += sprintf(out_buf + rc, "\n");
+	return rc;
+}
+
+/*
+ * prints header for debug entry
+ */
+
+int
+debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
+			 int area, debug_entry_t * entry, char *out_buf)
+{
+	struct timespec time_spec;
+	char *except_str;
+	unsigned long caller;
+	int rc = 0;
+	unsigned int level;
+
+	level = entry->id.fields.level;
+	stck_to_timespec(entry->id.stck, &time_spec);
+
+	if (entry->id.fields.exception)
+		except_str = "*";
+	else
+		except_str = "-";
+	caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+	rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p  ",
+		      area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level,
+		      except_str, entry->id.fields.cpuid, (void *) caller);
+	return rc;
+}
+EXPORT_SYMBOL(debug_dflt_header_fn);
+
+/*
+ * prints debug data sprintf-formated:
+ * debug_sprinf_event/exception calls must be used together with this view
+ */
+
+#define DEBUG_SPRINTF_MAX_ARGS 10
+
+static int
+debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view,
+                        char *out_buf, debug_sprintf_entry_t *curr_event)
+{
+	int num_longs, num_used_args = 0,i, rc = 0;
+	int index[DEBUG_SPRINTF_MAX_ARGS];
+
+	/* count of longs fit into one entry */
+	num_longs = id->buf_size /  sizeof(long); 
+
+	if(num_longs < 1)
+		goto out; /* bufsize of entry too small */
+	if(num_longs == 1) {
+		/* no args, we use only the string */
+		strcpy(out_buf, curr_event->string);
+		rc = strlen(curr_event->string);
+		goto out;
+	}
+
+	/* number of arguments used for sprintf (without the format string) */
+	num_used_args   = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1));
+
+	memset(index,0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int));
+
+	for(i = 0; i < num_used_args; i++)
+		index[i] = i;
+
+	rc =  sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
+		curr_event->args[index[1]], curr_event->args[index[2]],
+		curr_event->args[index[3]], curr_event->args[index[4]],
+		curr_event->args[index[5]], curr_event->args[index[6]],
+		curr_event->args[index[7]], curr_event->args[index[8]],
+		curr_event->args[index[9]]);
+
+out:
+
+	return rc;
+}
+
+/*
+ * debug_init:
+ * - is called exactly once to initialize the debug feature
+ */
+static int __init debug_init(void)
+{
+	s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
+	mutex_lock(&debug_mutex);
+	debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
+	initialized = 1;
+	mutex_unlock(&debug_mutex);
+	return 0;
+}
+postcore_initcall(debug_init);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
new file mode 100644
index 000000000..2f69243bf
--- /dev/null
+++ b/arch/s390/kernel/diag.c
@@ -0,0 +1,66 @@
+/*
+ * Implementation of s390 diagnose codes
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <asm/diag.h>
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+	register unsigned long _ry1 asm("2") = ry1;
+	register unsigned long _ry2 asm("3") = subcode;
+	int rc = 0;
+
+	asm volatile(
+		"   sam31\n"
+		"   diag    %2,2,0x14\n"
+		"   sam64\n"
+		"   ipm     %0\n"
+		"   srl     %0,28\n"
+		: "=d" (rc), "+d" (_ry2)
+		: "d" (rx), "d" (_ry1)
+		: "cc");
+
+	return rc;
+}
+EXPORT_SYMBOL(diag14);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+int diag210(struct diag210 *addr)
+{
+	/*
+	 * diag 210 needs its data below the 2GB border, so we
+	 * use a static data area to be sure
+	 */
+	static struct diag210 diag210_tmp;
+	static DEFINE_SPINLOCK(diag210_lock);
+	unsigned long flags;
+	int ccode;
+
+	spin_lock_irqsave(&diag210_lock, flags);
+	diag210_tmp = *addr;
+
+	asm volatile(
+		"	lhi	%0,-1\n"
+		"	sam31\n"
+		"	diag	%1,0,0x210\n"
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"1:	sam64\n"
+		EX_TABLE(0b, 1b)
+		: "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
+
+	*addr = diag210_tmp;
+	spin_unlock_irqrestore(&diag210_lock, flags);
+
+	return ccode;
+}
+EXPORT_SYMBOL(diag210);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
new file mode 100644
index 000000000..8140d10c6
--- /dev/null
+++ b/arch/s390/kernel/dis.c
@@ -0,0 +1,2049 @@
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/reboot.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+
+#include <asm/uaccess.h>
+#include <asm/dis.h>
+#include <asm/io.h>
+#include <linux/atomic.h>
+#include <asm/mathemu.h>
+#include <asm/cpcmd.h>
+#include <asm/lowcore.h>
+#include <asm/debug.h>
+#include <asm/irq.h>
+
+enum {
+	UNUSED,	/* Indicates the end of the operand list */
+	R_8,	/* GPR starting at position 8 */
+	R_12,	/* GPR starting at position 12 */
+	R_16,	/* GPR starting at position 16 */
+	R_20,	/* GPR starting at position 20 */
+	R_24,	/* GPR starting at position 24 */
+	R_28,	/* GPR starting at position 28 */
+	R_32,	/* GPR starting at position 32 */
+	F_8,	/* FPR starting at position 8 */
+	F_12,	/* FPR starting at position 12 */
+	F_16,	/* FPR starting at position 16 */
+	F_20,	/* FPR starting at position 16 */
+	F_24,	/* FPR starting at position 24 */
+	F_28,	/* FPR starting at position 28 */
+	F_32,	/* FPR starting at position 32 */
+	A_8,	/* Access reg. starting at position 8 */
+	A_12,	/* Access reg. starting at position 12 */
+	A_24,	/* Access reg. starting at position 24 */
+	A_28,	/* Access reg. starting at position 28 */
+	C_8,	/* Control reg. starting at position 8 */
+	C_12,	/* Control reg. starting at position 12 */
+	V_8,	/* Vector reg. starting at position 8, extension bit at 36 */
+	V_12,	/* Vector reg. starting at position 12, extension bit at 37 */
+	V_16,	/* Vector reg. starting at position 16, extension bit at 38 */
+	V_32,	/* Vector reg. starting at position 32, extension bit at 39 */
+	W_12,	/* Vector reg. at bit 12, extension at bit 37, used as index */
+	B_16,	/* Base register starting at position 16 */
+	B_32,	/* Base register starting at position 32 */
+	X_12,	/* Index register starting at position 12 */
+	D_20,	/* Displacement starting at position 20 */
+	D_36,	/* Displacement starting at position 36 */
+	D20_20,	/* 20 bit displacement starting at 20 */
+	L4_8,	/* 4 bit length starting at position 8 */
+	L4_12,	/* 4 bit length starting at position 12 */
+	L8_8,	/* 8 bit length starting at position 8 */
+	U4_8,	/* 4 bit unsigned value starting at 8 */
+	U4_12,	/* 4 bit unsigned value starting at 12 */
+	U4_16,	/* 4 bit unsigned value starting at 16 */
+	U4_20,	/* 4 bit unsigned value starting at 20 */
+	U4_24,	/* 4 bit unsigned value starting at 24 */
+	U4_28,	/* 4 bit unsigned value starting at 28 */
+	U4_32,	/* 4 bit unsigned value starting at 32 */
+	U4_36,	/* 4 bit unsigned value starting at 36 */
+	U8_8,	/* 8 bit unsigned value starting at 8 */
+	U8_16,	/* 8 bit unsigned value starting at 16 */
+	U8_24,	/* 8 bit unsigned value starting at 24 */
+	U8_32,	/* 8 bit unsigned value starting at 32 */
+	I8_8,	/* 8 bit signed value starting at 8 */
+	I8_16,	/* 8 bit signed value starting at 16 */
+	I8_24,	/* 8 bit signed value starting at 24 */
+	I8_32,	/* 8 bit signed value starting at 32 */
+	J12_12, /* PC relative offset at 12 */
+	I16_16,	/* 16 bit signed value starting at 16 */
+	I16_32,	/* 32 bit signed value starting at 16 */
+	U16_16,	/* 16 bit unsigned value starting at 16 */
+	U16_32,	/* 32 bit unsigned value starting at 16 */
+	J16_16,	/* PC relative jump offset at 16 */
+	J16_32, /* PC relative offset at 16 */
+	I24_24, /* 24 bit signed value starting at 24 */
+	J32_16,	/* PC relative long offset at 16 */
+	I32_16,	/* 32 bit signed value starting at 16 */
+	U32_16,	/* 32 bit unsigned value starting at 16 */
+	M_16,	/* 4 bit optional mask starting at 16 */
+	M_20,	/* 4 bit optional mask starting at 20 */
+	M_24,	/* 4 bit optional mask starting at 24 */
+	M_28,	/* 4 bit optional mask starting at 28 */
+	M_32,	/* 4 bit optional mask starting at 32 */
+	RO_28,	/* optional GPR starting at position 28 */
+};
+
+/*
+ * Enumeration of the different instruction formats.
+ * For details consult the principles of operation.
+ */
+enum {
+	INSTR_INVALID,
+	INSTR_E,
+	INSTR_IE_UU,
+	INSTR_MII_UPI,
+	INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU,
+	INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0,
+	INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP,
+	INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU,
+	INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP,
+	INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0,
+	INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF,
+	INSTR_RRE_RR, INSTR_RRE_RR_OPT,
+	INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR,
+	INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR,
+	INSTR_RRF_R0RR,	INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR,
+	INSTR_RRF_U0FF,	INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF,
+	INSTR_RRF_UUFR, INSTR_RRF_UURF,
+	INSTR_RRR_F0FF, INSTR_RRS_RRRDU,
+	INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR,
+	INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD,
+	INSTR_RSI_RRP,
+	INSTR_RSL_LRDFU, INSTR_RSL_R0RD,
+	INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD,
+	INSTR_RSY_RDRM, INSTR_RSY_RMRD,
+	INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD,
+	INSTR_RS_RURD,
+	INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM,
+	INSTR_RXF_FRRDF,
+	INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD,
+	INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD,
+	INSTR_SIL_RDI, INSTR_SIL_RDU,
+	INSTR_SIY_IRD, INSTR_SIY_URD,
+	INSTR_SI_URD,
+	INSTR_SMI_U0RDP,
+	INSTR_SSE_RDRD,
+	INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2,
+	INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD,
+	INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3,
+	INSTR_S_00, INSTR_S_RD,
+	INSTR_VRI_V0IM, INSTR_VRI_V0I0, INSTR_VRI_V0IIM, INSTR_VRI_VVIM,
+	INSTR_VRI_VVV0IM, INSTR_VRI_VVV0I0, INSTR_VRI_VVIMM,
+	INSTR_VRR_VV00MMM, INSTR_VRR_VV000MM, INSTR_VRR_VV0000M,
+	INSTR_VRR_VV00000, INSTR_VRR_VVV0M0M, INSTR_VRR_VV00M0M,
+	INSTR_VRR_VVV000M, INSTR_VRR_VVV000V, INSTR_VRR_VVV0000,
+	INSTR_VRR_VVV0MMM, INSTR_VRR_VVV00MM, INSTR_VRR_VVVMM0V,
+	INSTR_VRR_VVVM0MV, INSTR_VRR_VVVM00V, INSTR_VRR_VRR0000,
+	INSTR_VRS_VVRDM, INSTR_VRS_VVRD0, INSTR_VRS_VRRDM, INSTR_VRS_VRRD0,
+	INSTR_VRS_RVRDM,
+	INSTR_VRV_VVRDM, INSTR_VRV_VWRDM,
+	INSTR_VRX_VRRDM, INSTR_VRX_VRRD0,
+};
+
+static const struct s390_operand operands[] =
+{
+	[UNUSED]  = { 0, 0, 0 },
+	[R_8]	 = {  4,  8, OPERAND_GPR },
+	[R_12]	 = {  4, 12, OPERAND_GPR },
+	[R_16]	 = {  4, 16, OPERAND_GPR },
+	[R_20]	 = {  4, 20, OPERAND_GPR },
+	[R_24]	 = {  4, 24, OPERAND_GPR },
+	[R_28]	 = {  4, 28, OPERAND_GPR },
+	[R_32]	 = {  4, 32, OPERAND_GPR },
+	[F_8]	 = {  4,  8, OPERAND_FPR },
+	[F_12]	 = {  4, 12, OPERAND_FPR },
+	[F_16]	 = {  4, 16, OPERAND_FPR },
+	[F_20]	 = {  4, 16, OPERAND_FPR },
+	[F_24]	 = {  4, 24, OPERAND_FPR },
+	[F_28]	 = {  4, 28, OPERAND_FPR },
+	[F_32]	 = {  4, 32, OPERAND_FPR },
+	[A_8]	 = {  4,  8, OPERAND_AR },
+	[A_12]	 = {  4, 12, OPERAND_AR },
+	[A_24]	 = {  4, 24, OPERAND_AR },
+	[A_28]	 = {  4, 28, OPERAND_AR },
+	[C_8]	 = {  4,  8, OPERAND_CR },
+	[C_12]	 = {  4, 12, OPERAND_CR },
+	[V_8]	 = {  4,  8, OPERAND_VR },
+	[V_12]	 = {  4, 12, OPERAND_VR },
+	[V_16]	 = {  4, 16, OPERAND_VR },
+	[V_32]	 = {  4, 32, OPERAND_VR },
+	[W_12]	 = {  4, 12, OPERAND_INDEX | OPERAND_VR },
+	[B_16]	 = {  4, 16, OPERAND_BASE | OPERAND_GPR },
+	[B_32]	 = {  4, 32, OPERAND_BASE | OPERAND_GPR },
+	[X_12]	 = {  4, 12, OPERAND_INDEX | OPERAND_GPR },
+	[D_20]	 = { 12, 20, OPERAND_DISP },
+	[D_36]	 = { 12, 36, OPERAND_DISP },
+	[D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED },
+	[L4_8]	 = {  4,  8, OPERAND_LENGTH },
+	[L4_12]  = {  4, 12, OPERAND_LENGTH },
+	[L8_8]	 = {  8,  8, OPERAND_LENGTH },
+	[U4_8]	 = {  4,  8, 0 },
+	[U4_12]  = {  4, 12, 0 },
+	[U4_16]  = {  4, 16, 0 },
+	[U4_20]  = {  4, 20, 0 },
+	[U4_24]  = {  4, 24, 0 },
+	[U4_28]  = {  4, 28, 0 },
+	[U4_32]  = {  4, 32, 0 },
+	[U4_36]  = {  4, 36, 0 },
+	[U8_8]	 = {  8,  8, 0 },
+	[U8_16]  = {  8, 16, 0 },
+	[U8_24]  = {  8, 24, 0 },
+	[U8_32]  = {  8, 32, 0 },
+	[J12_12] = { 12, 12, OPERAND_PCREL },
+	[I8_8]	 = {  8,  8, OPERAND_SIGNED },
+	[I8_16]  = {  8, 16, OPERAND_SIGNED },
+	[I8_24]  = {  8, 24, OPERAND_SIGNED },
+	[I8_32]  = {  8, 32, OPERAND_SIGNED },
+	[I16_32] = { 16, 32, OPERAND_SIGNED },
+	[I16_16] = { 16, 16, OPERAND_SIGNED },
+	[U16_16] = { 16, 16, 0 },
+	[U16_32] = { 16, 32, 0 },
+	[J16_16] = { 16, 16, OPERAND_PCREL },
+	[J16_32] = { 16, 32, OPERAND_PCREL },
+	[I24_24] = { 24, 24, OPERAND_SIGNED },
+	[J32_16] = { 32, 16, OPERAND_PCREL },
+	[I32_16] = { 32, 16, OPERAND_SIGNED },
+	[U32_16] = { 32, 16, 0 },
+	[M_16]	 = {  4, 16, 0 },
+	[M_20]	 = {  4, 20, 0 },
+	[M_24]	 = {  4, 24, 0 },
+	[M_28]	 = {  4, 28, 0 },
+	[M_32]	 = {  4, 32, 0 },
+	[RO_28]  = {  4, 28, OPERAND_GPR }
+};
+
+static const unsigned char formats[][7] = {
+	[INSTR_E]	  = { 0xff, 0,0,0,0,0,0 },
+	[INSTR_IE_UU]	  = { 0xff, U4_24,U4_28,0,0,0,0 },
+	[INSTR_MII_UPI]	  = { 0xff, U4_8,J12_12,I24_24 },
+	[INSTR_RIE_R0IU]  = { 0xff, R_8,I16_16,U4_32,0,0,0 },
+	[INSTR_RIE_R0UU]  = { 0xff, R_8,U16_16,U4_32,0,0,0 },
+	[INSTR_RIE_RRI0]  = { 0xff, R_8,R_12,I16_16,0,0,0 },
+	[INSTR_RIE_RRPU]  = { 0xff, R_8,R_12,U4_32,J16_16,0,0 },
+	[INSTR_RIE_RRP]	  = { 0xff, R_8,R_12,J16_16,0,0,0 },
+	[INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 },
+	[INSTR_RIE_RUPI]  = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 },
+	[INSTR_RIE_RUPU]  = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 },
+	[INSTR_RIL_RI]	  = { 0x0f, R_8,I32_16,0,0,0,0 },
+	[INSTR_RIL_RP]	  = { 0x0f, R_8,J32_16,0,0,0,0 },
+	[INSTR_RIL_RU]	  = { 0x0f, R_8,U32_16,0,0,0,0 },
+	[INSTR_RIL_UP]	  = { 0x0f, U4_8,J32_16,0,0,0,0 },
+	[INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 },
+	[INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 },
+	[INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 },
+	[INSTR_RI_RI]	  = { 0x0f, R_8,I16_16,0,0,0,0 },
+	[INSTR_RI_RP]	  = { 0x0f, R_8,J16_16,0,0,0,0 },
+	[INSTR_RI_RU]	  = { 0x0f, R_8,U16_16,0,0,0,0 },
+	[INSTR_RI_UP]	  = { 0x0f, U4_8,J16_16,0,0,0,0 },
+	[INSTR_RRE_00]	  = { 0xff, 0,0,0,0,0,0 },
+	[INSTR_RRE_0R]	  = { 0xff, R_28,0,0,0,0,0 },
+	[INSTR_RRE_AA]	  = { 0xff, A_24,A_28,0,0,0,0 },
+	[INSTR_RRE_AR]	  = { 0xff, A_24,R_28,0,0,0,0 },
+	[INSTR_RRE_F0]	  = { 0xff, F_24,0,0,0,0,0 },
+	[INSTR_RRE_FF]	  = { 0xff, F_24,F_28,0,0,0,0 },
+	[INSTR_RRE_FR]	  = { 0xff, F_24,R_28,0,0,0,0 },
+	[INSTR_RRE_R0]	  = { 0xff, R_24,0,0,0,0,0 },
+	[INSTR_RRE_RA]	  = { 0xff, R_24,A_28,0,0,0,0 },
+	[INSTR_RRE_RF]	  = { 0xff, R_24,F_28,0,0,0,0 },
+	[INSTR_RRE_RR]	  = { 0xff, R_24,R_28,0,0,0,0 },
+	[INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 },
+	[INSTR_RRF_0UFF]  = { 0xff, F_24,F_28,U4_20,0,0,0 },
+	[INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 },
+	[INSTR_RRF_F0FF]  = { 0xff, F_16,F_24,F_28,0,0,0 },
+	[INSTR_RRF_F0FR]  = { 0xff, F_24,F_16,R_28,0,0,0 },
+	[INSTR_RRF_FFRU]  = { 0xff, F_24,F_16,R_28,U4_20,0,0 },
+	[INSTR_RRF_FUFF]  = { 0xff, F_24,F_16,F_28,U4_20,0,0 },
+	[INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 },
+	[INSTR_RRF_M0RR]  = { 0xff, R_24,R_28,M_16,0,0,0 },
+	[INSTR_RRF_R0RR]  = { 0xff, R_24,R_16,R_28,0,0,0 },
+	[INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 },
+	[INSTR_RRF_RMRR]  = { 0xff, R_24,R_16,R_28,M_20,0,0 },
+	[INSTR_RRF_RURR]  = { 0xff, R_24,R_28,R_16,U4_20,0,0 },
+	[INSTR_RRF_U0FF]  = { 0xff, F_24,U4_16,F_28,0,0,0 },
+	[INSTR_RRF_U0RF]  = { 0xff, R_24,U4_16,F_28,0,0,0 },
+	[INSTR_RRF_U0RR]  = { 0xff, R_24,R_28,U4_16,0,0,0 },
+	[INSTR_RRF_UUFF]  = { 0xff, F_24,U4_16,F_28,U4_20,0,0 },
+	[INSTR_RRF_UUFR]  = { 0xff, F_24,U4_16,R_28,U4_20,0,0 },
+	[INSTR_RRF_UURF]  = { 0xff, R_24,U4_16,F_28,U4_20,0,0 },
+	[INSTR_RRR_F0FF]  = { 0xff, F_24,F_28,F_16,0,0,0 },
+	[INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 },
+	[INSTR_RR_FF]	  = { 0xff, F_8,F_12,0,0,0,0 },
+	[INSTR_RR_R0]	  = { 0xff, R_8, 0,0,0,0,0 },
+	[INSTR_RR_RR]	  = { 0xff, R_8,R_12,0,0,0,0 },
+	[INSTR_RR_U0]	  = { 0xff, U8_8, 0,0,0,0,0 },
+	[INSTR_RR_UR]	  = { 0xff, U4_8,R_12,0,0,0,0 },
+	[INSTR_RSE_CCRD]  = { 0xff, C_8,C_12,D_20,B_16,0,0 },
+	[INSTR_RSE_RRRD]  = { 0xff, R_8,R_12,D_20,B_16,0,0 },
+	[INSTR_RSE_RURD]  = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
+	[INSTR_RSI_RRP]	  = { 0xff, R_8,R_12,J16_16,0,0,0 },
+	[INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 },
+	[INSTR_RSL_R0RD]  = { 0xff, D_20,L4_8,B_16,0,0,0 },
+	[INSTR_RSY_AARD]  = { 0xff, A_8,A_12,D20_20,B_16,0,0 },
+	[INSTR_RSY_CCRD]  = { 0xff, C_8,C_12,D20_20,B_16,0,0 },
+	[INSTR_RSY_RDRM]  = { 0xff, R_8,D20_20,B_16,U4_12,0,0 },
+	[INSTR_RSY_RMRD]  = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
+	[INSTR_RSY_RRRD]  = { 0xff, R_8,R_12,D20_20,B_16,0,0 },
+	[INSTR_RSY_RURD]  = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
+	[INSTR_RS_AARD]	  = { 0xff, A_8,A_12,D_20,B_16,0,0 },
+	[INSTR_RS_CCRD]	  = { 0xff, C_8,C_12,D_20,B_16,0,0 },
+	[INSTR_RS_R0RD]	  = { 0xff, R_8,D_20,B_16,0,0,0 },
+	[INSTR_RS_RRRD]	  = { 0xff, R_8,R_12,D_20,B_16,0,0 },
+	[INSTR_RS_RURD]	  = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
+	[INSTR_RXE_FRRD]  = { 0xff, F_8,D_20,X_12,B_16,0,0 },
+	[INSTR_RXE_RRRD]  = { 0xff, R_8,D_20,X_12,B_16,0,0 },
+	[INSTR_RXE_RRRDM] = { 0xff, R_8,D_20,X_12,B_16,M_32,0 },
+	[INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 },
+	[INSTR_RXY_FRRD]  = { 0xff, F_8,D20_20,X_12,B_16,0,0 },
+	[INSTR_RXY_RRRD]  = { 0xff, R_8,D20_20,X_12,B_16,0,0 },
+	[INSTR_RXY_URRD]  = { 0xff, U4_8,D20_20,X_12,B_16,0,0 },
+	[INSTR_RX_FRRD]	  = { 0xff, F_8,D_20,X_12,B_16,0,0 },
+	[INSTR_RX_RRRD]	  = { 0xff, R_8,D_20,X_12,B_16,0,0 },
+	[INSTR_RX_URRD]	  = { 0xff, U4_8,D_20,X_12,B_16,0,0 },
+	[INSTR_SIL_RDI]   = { 0xff, D_20,B_16,I16_32,0,0,0 },
+	[INSTR_SIL_RDU]   = { 0xff, D_20,B_16,U16_32,0,0,0 },
+	[INSTR_SIY_IRD]   = { 0xff, D20_20,B_16,I8_8,0,0,0 },
+	[INSTR_SIY_URD]	  = { 0xff, D20_20,B_16,U8_8,0,0,0 },
+	[INSTR_SI_URD]	  = { 0xff, D_20,B_16,U8_8,0,0,0 },
+	[INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 },
+	[INSTR_SSE_RDRD]  = { 0xff, D_20,B_16,D_36,B_32,0,0 },
+	[INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 },
+	[INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 },
+	[INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 },
+	[INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 },
+	[INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 },
+	[INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 },
+	[INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 },
+	[INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 },
+	[INSTR_S_00]	  = { 0xff, 0,0,0,0,0,0 },
+	[INSTR_S_RD]	  = { 0xff, D_20,B_16,0,0,0,0 },
+	[INSTR_VRI_V0IM]  = { 0xff, V_8,I16_16,M_32,0,0,0 },
+	[INSTR_VRI_V0I0]  = { 0xff, V_8,I16_16,0,0,0,0 },
+	[INSTR_VRI_V0IIM] = { 0xff, V_8,I8_16,I8_24,M_32,0,0 },
+	[INSTR_VRI_VVIM]  = { 0xff, V_8,I16_16,V_12,M_32,0,0 },
+	[INSTR_VRI_VVV0IM]= { 0xff, V_8,V_12,V_16,I8_24,M_32,0 },
+	[INSTR_VRI_VVV0I0]= { 0xff, V_8,V_12,V_16,I8_24,0,0 },
+	[INSTR_VRI_VVIMM] = { 0xff, V_8,V_12,I16_16,M_32,M_28,0 },
+	[INSTR_VRR_VV00MMM]={ 0xff, V_8,V_12,M_32,M_28,M_24,0 },
+	[INSTR_VRR_VV000MM]={ 0xff, V_8,V_12,M_32,M_28,0,0 },
+	[INSTR_VRR_VV0000M]={ 0xff, V_8,V_12,M_32,0,0,0 },
+	[INSTR_VRR_VV00000]={ 0xff, V_8,V_12,0,0,0,0 },
+	[INSTR_VRR_VVV0M0M]={ 0xff, V_8,V_12,V_16,M_32,M_24,0 },
+	[INSTR_VRR_VV00M0M]={ 0xff, V_8,V_12,M_32,M_24,0,0 },
+	[INSTR_VRR_VVV000M]={ 0xff, V_8,V_12,V_16,M_32,0,0 },
+	[INSTR_VRR_VVV000V]={ 0xff, V_8,V_12,V_16,V_32,0,0 },
+	[INSTR_VRR_VVV0000]={ 0xff, V_8,V_12,V_16,0,0,0 },
+	[INSTR_VRR_VVV0MMM]={ 0xff, V_8,V_12,V_16,M_32,M_28,M_24 },
+	[INSTR_VRR_VVV00MM]={ 0xff, V_8,V_12,V_16,M_32,M_28,0 },
+	[INSTR_VRR_VVVMM0V]={ 0xff, V_8,V_12,V_16,V_32,M_20,M_24 },
+	[INSTR_VRR_VVVM0MV]={ 0xff, V_8,V_12,V_16,V_32,M_28,M_20 },
+	[INSTR_VRR_VVVM00V]={ 0xff, V_8,V_12,V_16,V_32,M_20,0 },
+	[INSTR_VRR_VRR0000]={ 0xff, V_8,R_12,R_16,0,0,0 },
+	[INSTR_VRS_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 },
+	[INSTR_VRS_VVRD0] = { 0xff, V_8,V_12,D_20,B_16,0,0 },
+	[INSTR_VRS_VRRDM] = { 0xff, V_8,R_12,D_20,B_16,M_32,0 },
+	[INSTR_VRS_VRRD0] = { 0xff, V_8,R_12,D_20,B_16,0,0 },
+	[INSTR_VRS_RVRDM] = { 0xff, R_8,V_12,D_20,B_16,M_32,0 },
+	[INSTR_VRV_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 },
+	[INSTR_VRV_VWRDM] = { 0xff, V_8,D_20,W_12,B_16,M_32,0 },
+	[INSTR_VRX_VRRDM] = { 0xff, V_8,D_20,X_12,B_16,M_32,0 },
+	[INSTR_VRX_VRRD0] = { 0xff, V_8,D_20,X_12,B_16,0,0 },
+};
+
+enum {
+	LONG_INSN_ALGHSIK,
+	LONG_INSN_ALHHHR,
+	LONG_INSN_ALHHLR,
+	LONG_INSN_ALHSIK,
+	LONG_INSN_ALSIHN,
+	LONG_INSN_CDFBRA,
+	LONG_INSN_CDGBRA,
+	LONG_INSN_CDGTRA,
+	LONG_INSN_CDLFBR,
+	LONG_INSN_CDLFTR,
+	LONG_INSN_CDLGBR,
+	LONG_INSN_CDLGTR,
+	LONG_INSN_CEFBRA,
+	LONG_INSN_CEGBRA,
+	LONG_INSN_CELFBR,
+	LONG_INSN_CELGBR,
+	LONG_INSN_CFDBRA,
+	LONG_INSN_CFEBRA,
+	LONG_INSN_CFXBRA,
+	LONG_INSN_CGDBRA,
+	LONG_INSN_CGDTRA,
+	LONG_INSN_CGEBRA,
+	LONG_INSN_CGXBRA,
+	LONG_INSN_CGXTRA,
+	LONG_INSN_CLFDBR,
+	LONG_INSN_CLFDTR,
+	LONG_INSN_CLFEBR,
+	LONG_INSN_CLFHSI,
+	LONG_INSN_CLFXBR,
+	LONG_INSN_CLFXTR,
+	LONG_INSN_CLGDBR,
+	LONG_INSN_CLGDTR,
+	LONG_INSN_CLGEBR,
+	LONG_INSN_CLGFRL,
+	LONG_INSN_CLGHRL,
+	LONG_INSN_CLGHSI,
+	LONG_INSN_CLGXBR,
+	LONG_INSN_CLGXTR,
+	LONG_INSN_CLHHSI,
+	LONG_INSN_CXFBRA,
+	LONG_INSN_CXGBRA,
+	LONG_INSN_CXGTRA,
+	LONG_INSN_CXLFBR,
+	LONG_INSN_CXLFTR,
+	LONG_INSN_CXLGBR,
+	LONG_INSN_CXLGTR,
+	LONG_INSN_FIDBRA,
+	LONG_INSN_FIEBRA,
+	LONG_INSN_FIXBRA,
+	LONG_INSN_LDXBRA,
+	LONG_INSN_LEDBRA,
+	LONG_INSN_LEXBRA,
+	LONG_INSN_LLGFAT,
+	LONG_INSN_LLGFRL,
+	LONG_INSN_LLGHRL,
+	LONG_INSN_LLGTAT,
+	LONG_INSN_POPCNT,
+	LONG_INSN_RIEMIT,
+	LONG_INSN_RINEXT,
+	LONG_INSN_RISBGN,
+	LONG_INSN_RISBHG,
+	LONG_INSN_RISBLG,
+	LONG_INSN_SLHHHR,
+	LONG_INSN_SLHHLR,
+	LONG_INSN_TABORT,
+	LONG_INSN_TBEGIN,
+	LONG_INSN_TBEGINC,
+	LONG_INSN_PCISTG,
+	LONG_INSN_MPCIFC,
+	LONG_INSN_STPCIFC,
+	LONG_INSN_PCISTB,
+	LONG_INSN_VPOPCT,
+	LONG_INSN_VERLLV,
+	LONG_INSN_VESRAV,
+	LONG_INSN_VESRLV,
+	LONG_INSN_VSBCBI,
+	LONG_INSN_STCCTM
+};
+
+static char *long_insn_name[] = {
+	[LONG_INSN_ALGHSIK] = "alghsik",
+	[LONG_INSN_ALHHHR] = "alhhhr",
+	[LONG_INSN_ALHHLR] = "alhhlr",
+	[LONG_INSN_ALHSIK] = "alhsik",
+	[LONG_INSN_ALSIHN] = "alsihn",
+	[LONG_INSN_CDFBRA] = "cdfbra",
+	[LONG_INSN_CDGBRA] = "cdgbra",
+	[LONG_INSN_CDGTRA] = "cdgtra",
+	[LONG_INSN_CDLFBR] = "cdlfbr",
+	[LONG_INSN_CDLFTR] = "cdlftr",
+	[LONG_INSN_CDLGBR] = "cdlgbr",
+	[LONG_INSN_CDLGTR] = "cdlgtr",
+	[LONG_INSN_CEFBRA] = "cefbra",
+	[LONG_INSN_CEGBRA] = "cegbra",
+	[LONG_INSN_CELFBR] = "celfbr",
+	[LONG_INSN_CELGBR] = "celgbr",
+	[LONG_INSN_CFDBRA] = "cfdbra",
+	[LONG_INSN_CFEBRA] = "cfebra",
+	[LONG_INSN_CFXBRA] = "cfxbra",
+	[LONG_INSN_CGDBRA] = "cgdbra",
+	[LONG_INSN_CGDTRA] = "cgdtra",
+	[LONG_INSN_CGEBRA] = "cgebra",
+	[LONG_INSN_CGXBRA] = "cgxbra",
+	[LONG_INSN_CGXTRA] = "cgxtra",
+	[LONG_INSN_CLFDBR] = "clfdbr",
+	[LONG_INSN_CLFDTR] = "clfdtr",
+	[LONG_INSN_CLFEBR] = "clfebr",
+	[LONG_INSN_CLFHSI] = "clfhsi",
+	[LONG_INSN_CLFXBR] = "clfxbr",
+	[LONG_INSN_CLFXTR] = "clfxtr",
+	[LONG_INSN_CLGDBR] = "clgdbr",
+	[LONG_INSN_CLGDTR] = "clgdtr",
+	[LONG_INSN_CLGEBR] = "clgebr",
+	[LONG_INSN_CLGFRL] = "clgfrl",
+	[LONG_INSN_CLGHRL] = "clghrl",
+	[LONG_INSN_CLGHSI] = "clghsi",
+	[LONG_INSN_CLGXBR] = "clgxbr",
+	[LONG_INSN_CLGXTR] = "clgxtr",
+	[LONG_INSN_CLHHSI] = "clhhsi",
+	[LONG_INSN_CXFBRA] = "cxfbra",
+	[LONG_INSN_CXGBRA] = "cxgbra",
+	[LONG_INSN_CXGTRA] = "cxgtra",
+	[LONG_INSN_CXLFBR] = "cxlfbr",
+	[LONG_INSN_CXLFTR] = "cxlftr",
+	[LONG_INSN_CXLGBR] = "cxlgbr",
+	[LONG_INSN_CXLGTR] = "cxlgtr",
+	[LONG_INSN_FIDBRA] = "fidbra",
+	[LONG_INSN_FIEBRA] = "fiebra",
+	[LONG_INSN_FIXBRA] = "fixbra",
+	[LONG_INSN_LDXBRA] = "ldxbra",
+	[LONG_INSN_LEDBRA] = "ledbra",
+	[LONG_INSN_LEXBRA] = "lexbra",
+	[LONG_INSN_LLGFAT] = "llgfat",
+	[LONG_INSN_LLGFRL] = "llgfrl",
+	[LONG_INSN_LLGHRL] = "llghrl",
+	[LONG_INSN_LLGTAT] = "llgtat",
+	[LONG_INSN_POPCNT] = "popcnt",
+	[LONG_INSN_RIEMIT] = "riemit",
+	[LONG_INSN_RINEXT] = "rinext",
+	[LONG_INSN_RISBGN] = "risbgn",
+	[LONG_INSN_RISBHG] = "risbhg",
+	[LONG_INSN_RISBLG] = "risblg",
+	[LONG_INSN_SLHHHR] = "slhhhr",
+	[LONG_INSN_SLHHLR] = "slhhlr",
+	[LONG_INSN_TABORT] = "tabort",
+	[LONG_INSN_TBEGIN] = "tbegin",
+	[LONG_INSN_TBEGINC] = "tbeginc",
+	[LONG_INSN_PCISTG] = "pcistg",
+	[LONG_INSN_MPCIFC] = "mpcifc",
+	[LONG_INSN_STPCIFC] = "stpcifc",
+	[LONG_INSN_PCISTB] = "pcistb",
+	[LONG_INSN_VPOPCT] = "vpopct",
+	[LONG_INSN_VERLLV] = "verllv",
+	[LONG_INSN_VESRAV] = "vesrav",
+	[LONG_INSN_VESRLV] = "vesrlv",
+	[LONG_INSN_VSBCBI] = "vsbcbi",
+	[LONG_INSN_STCCTM] = "stcctm",
+};
+
+static struct s390_insn opcode[] = {
+	{ "bprp", 0xc5, INSTR_MII_UPI },
+	{ "bpp", 0xc7, INSTR_SMI_U0RDP },
+	{ "trtr", 0xd0, INSTR_SS_L0RDRD },
+	{ "lmd", 0xef, INSTR_SS_RRRDRD3 },
+	{ "spm", 0x04, INSTR_RR_R0 },
+	{ "balr", 0x05, INSTR_RR_RR },
+	{ "bctr", 0x06, INSTR_RR_RR },
+	{ "bcr", 0x07, INSTR_RR_UR },
+	{ "svc", 0x0a, INSTR_RR_U0 },
+	{ "bsm", 0x0b, INSTR_RR_RR },
+	{ "bassm", 0x0c, INSTR_RR_RR },
+	{ "basr", 0x0d, INSTR_RR_RR },
+	{ "mvcl", 0x0e, INSTR_RR_RR },
+	{ "clcl", 0x0f, INSTR_RR_RR },
+	{ "lpr", 0x10, INSTR_RR_RR },
+	{ "lnr", 0x11, INSTR_RR_RR },
+	{ "ltr", 0x12, INSTR_RR_RR },
+	{ "lcr", 0x13, INSTR_RR_RR },
+	{ "nr", 0x14, INSTR_RR_RR },
+	{ "clr", 0x15, INSTR_RR_RR },
+	{ "or", 0x16, INSTR_RR_RR },
+	{ "xr", 0x17, INSTR_RR_RR },
+	{ "lr", 0x18, INSTR_RR_RR },
+	{ "cr", 0x19, INSTR_RR_RR },
+	{ "ar", 0x1a, INSTR_RR_RR },
+	{ "sr", 0x1b, INSTR_RR_RR },
+	{ "mr", 0x1c, INSTR_RR_RR },
+	{ "dr", 0x1d, INSTR_RR_RR },
+	{ "alr", 0x1e, INSTR_RR_RR },
+	{ "slr", 0x1f, INSTR_RR_RR },
+	{ "lpdr", 0x20, INSTR_RR_FF },
+	{ "lndr", 0x21, INSTR_RR_FF },
+	{ "ltdr", 0x22, INSTR_RR_FF },
+	{ "lcdr", 0x23, INSTR_RR_FF },
+	{ "hdr", 0x24, INSTR_RR_FF },
+	{ "ldxr", 0x25, INSTR_RR_FF },
+	{ "mxr", 0x26, INSTR_RR_FF },
+	{ "mxdr", 0x27, INSTR_RR_FF },
+	{ "ldr", 0x28, INSTR_RR_FF },
+	{ "cdr", 0x29, INSTR_RR_FF },
+	{ "adr", 0x2a, INSTR_RR_FF },
+	{ "sdr", 0x2b, INSTR_RR_FF },
+	{ "mdr", 0x2c, INSTR_RR_FF },
+	{ "ddr", 0x2d, INSTR_RR_FF },
+	{ "awr", 0x2e, INSTR_RR_FF },
+	{ "swr", 0x2f, INSTR_RR_FF },
+	{ "lper", 0x30, INSTR_RR_FF },
+	{ "lner", 0x31, INSTR_RR_FF },
+	{ "lter", 0x32, INSTR_RR_FF },
+	{ "lcer", 0x33, INSTR_RR_FF },
+	{ "her", 0x34, INSTR_RR_FF },
+	{ "ledr", 0x35, INSTR_RR_FF },
+	{ "axr", 0x36, INSTR_RR_FF },
+	{ "sxr", 0x37, INSTR_RR_FF },
+	{ "ler", 0x38, INSTR_RR_FF },
+	{ "cer", 0x39, INSTR_RR_FF },
+	{ "aer", 0x3a, INSTR_RR_FF },
+	{ "ser", 0x3b, INSTR_RR_FF },
+	{ "mder", 0x3c, INSTR_RR_FF },
+	{ "der", 0x3d, INSTR_RR_FF },
+	{ "aur", 0x3e, INSTR_RR_FF },
+	{ "sur", 0x3f, INSTR_RR_FF },
+	{ "sth", 0x40, INSTR_RX_RRRD },
+	{ "la", 0x41, INSTR_RX_RRRD },
+	{ "stc", 0x42, INSTR_RX_RRRD },
+	{ "ic", 0x43, INSTR_RX_RRRD },
+	{ "ex", 0x44, INSTR_RX_RRRD },
+	{ "bal", 0x45, INSTR_RX_RRRD },
+	{ "bct", 0x46, INSTR_RX_RRRD },
+	{ "bc", 0x47, INSTR_RX_URRD },
+	{ "lh", 0x48, INSTR_RX_RRRD },
+	{ "ch", 0x49, INSTR_RX_RRRD },
+	{ "ah", 0x4a, INSTR_RX_RRRD },
+	{ "sh", 0x4b, INSTR_RX_RRRD },
+	{ "mh", 0x4c, INSTR_RX_RRRD },
+	{ "bas", 0x4d, INSTR_RX_RRRD },
+	{ "cvd", 0x4e, INSTR_RX_RRRD },
+	{ "cvb", 0x4f, INSTR_RX_RRRD },
+	{ "st", 0x50, INSTR_RX_RRRD },
+	{ "lae", 0x51, INSTR_RX_RRRD },
+	{ "n", 0x54, INSTR_RX_RRRD },
+	{ "cl", 0x55, INSTR_RX_RRRD },
+	{ "o", 0x56, INSTR_RX_RRRD },
+	{ "x", 0x57, INSTR_RX_RRRD },
+	{ "l", 0x58, INSTR_RX_RRRD },
+	{ "c", 0x59, INSTR_RX_RRRD },
+	{ "a", 0x5a, INSTR_RX_RRRD },
+	{ "s", 0x5b, INSTR_RX_RRRD },
+	{ "m", 0x5c, INSTR_RX_RRRD },
+	{ "d", 0x5d, INSTR_RX_RRRD },
+	{ "al", 0x5e, INSTR_RX_RRRD },
+	{ "sl", 0x5f, INSTR_RX_RRRD },
+	{ "std", 0x60, INSTR_RX_FRRD },
+	{ "mxd", 0x67, INSTR_RX_FRRD },
+	{ "ld", 0x68, INSTR_RX_FRRD },
+	{ "cd", 0x69, INSTR_RX_FRRD },
+	{ "ad", 0x6a, INSTR_RX_FRRD },
+	{ "sd", 0x6b, INSTR_RX_FRRD },
+	{ "md", 0x6c, INSTR_RX_FRRD },
+	{ "dd", 0x6d, INSTR_RX_FRRD },
+	{ "aw", 0x6e, INSTR_RX_FRRD },
+	{ "sw", 0x6f, INSTR_RX_FRRD },
+	{ "ste", 0x70, INSTR_RX_FRRD },
+	{ "ms", 0x71, INSTR_RX_RRRD },
+	{ "le", 0x78, INSTR_RX_FRRD },
+	{ "ce", 0x79, INSTR_RX_FRRD },
+	{ "ae", 0x7a, INSTR_RX_FRRD },
+	{ "se", 0x7b, INSTR_RX_FRRD },
+	{ "mde", 0x7c, INSTR_RX_FRRD },
+	{ "de", 0x7d, INSTR_RX_FRRD },
+	{ "au", 0x7e, INSTR_RX_FRRD },
+	{ "su", 0x7f, INSTR_RX_FRRD },
+	{ "ssm", 0x80, INSTR_S_RD },
+	{ "lpsw", 0x82, INSTR_S_RD },
+	{ "diag", 0x83, INSTR_RS_RRRD },
+	{ "brxh", 0x84, INSTR_RSI_RRP },
+	{ "brxle", 0x85, INSTR_RSI_RRP },
+	{ "bxh", 0x86, INSTR_RS_RRRD },
+	{ "bxle", 0x87, INSTR_RS_RRRD },
+	{ "srl", 0x88, INSTR_RS_R0RD },
+	{ "sll", 0x89, INSTR_RS_R0RD },
+	{ "sra", 0x8a, INSTR_RS_R0RD },
+	{ "sla", 0x8b, INSTR_RS_R0RD },
+	{ "srdl", 0x8c, INSTR_RS_R0RD },
+	{ "sldl", 0x8d, INSTR_RS_R0RD },
+	{ "srda", 0x8e, INSTR_RS_R0RD },
+	{ "slda", 0x8f, INSTR_RS_R0RD },
+	{ "stm", 0x90, INSTR_RS_RRRD },
+	{ "tm", 0x91, INSTR_SI_URD },
+	{ "mvi", 0x92, INSTR_SI_URD },
+	{ "ts", 0x93, INSTR_S_RD },
+	{ "ni", 0x94, INSTR_SI_URD },
+	{ "cli", 0x95, INSTR_SI_URD },
+	{ "oi", 0x96, INSTR_SI_URD },
+	{ "xi", 0x97, INSTR_SI_URD },
+	{ "lm", 0x98, INSTR_RS_RRRD },
+	{ "trace", 0x99, INSTR_RS_RRRD },
+	{ "lam", 0x9a, INSTR_RS_AARD },
+	{ "stam", 0x9b, INSTR_RS_AARD },
+	{ "mvcle", 0xa8, INSTR_RS_RRRD },
+	{ "clcle", 0xa9, INSTR_RS_RRRD },
+	{ "stnsm", 0xac, INSTR_SI_URD },
+	{ "stosm", 0xad, INSTR_SI_URD },
+	{ "sigp", 0xae, INSTR_RS_RRRD },
+	{ "mc", 0xaf, INSTR_SI_URD },
+	{ "lra", 0xb1, INSTR_RX_RRRD },
+	{ "stctl", 0xb6, INSTR_RS_CCRD },
+	{ "lctl", 0xb7, INSTR_RS_CCRD },
+	{ "cs", 0xba, INSTR_RS_RRRD },
+	{ "cds", 0xbb, INSTR_RS_RRRD },
+	{ "clm", 0xbd, INSTR_RS_RURD },
+	{ "stcm", 0xbe, INSTR_RS_RURD },
+	{ "icm", 0xbf, INSTR_RS_RURD },
+	{ "mvn", 0xd1, INSTR_SS_L0RDRD },
+	{ "mvc", 0xd2, INSTR_SS_L0RDRD },
+	{ "mvz", 0xd3, INSTR_SS_L0RDRD },
+	{ "nc", 0xd4, INSTR_SS_L0RDRD },
+	{ "clc", 0xd5, INSTR_SS_L0RDRD },
+	{ "oc", 0xd6, INSTR_SS_L0RDRD },
+	{ "xc", 0xd7, INSTR_SS_L0RDRD },
+	{ "mvck", 0xd9, INSTR_SS_RRRDRD },
+	{ "mvcp", 0xda, INSTR_SS_RRRDRD },
+	{ "mvcs", 0xdb, INSTR_SS_RRRDRD },
+	{ "tr", 0xdc, INSTR_SS_L0RDRD },
+	{ "trt", 0xdd, INSTR_SS_L0RDRD },
+	{ "ed", 0xde, INSTR_SS_L0RDRD },
+	{ "edmk", 0xdf, INSTR_SS_L0RDRD },
+	{ "pku", 0xe1, INSTR_SS_L0RDRD },
+	{ "unpku", 0xe2, INSTR_SS_L0RDRD },
+	{ "mvcin", 0xe8, INSTR_SS_L0RDRD },
+	{ "pka", 0xe9, INSTR_SS_L0RDRD },
+	{ "unpka", 0xea, INSTR_SS_L0RDRD },
+	{ "plo", 0xee, INSTR_SS_RRRDRD2 },
+	{ "srp", 0xf0, INSTR_SS_LIRDRD },
+	{ "mvo", 0xf1, INSTR_SS_LLRDRD },
+	{ "pack", 0xf2, INSTR_SS_LLRDRD },
+	{ "unpk", 0xf3, INSTR_SS_LLRDRD },
+	{ "zap", 0xf8, INSTR_SS_LLRDRD },
+	{ "cp", 0xf9, INSTR_SS_LLRDRD },
+	{ "ap", 0xfa, INSTR_SS_LLRDRD },
+	{ "sp", 0xfb, INSTR_SS_LLRDRD },
+	{ "mp", 0xfc, INSTR_SS_LLRDRD },
+	{ "dp", 0xfd, INSTR_SS_LLRDRD },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_01[] = {
+	{ "ptff", 0x04, INSTR_E },
+	{ "pfpo", 0x0a, INSTR_E },
+	{ "sam64", 0x0e, INSTR_E },
+	{ "pr", 0x01, INSTR_E },
+	{ "upt", 0x02, INSTR_E },
+	{ "sckpf", 0x07, INSTR_E },
+	{ "tam", 0x0b, INSTR_E },
+	{ "sam24", 0x0c, INSTR_E },
+	{ "sam31", 0x0d, INSTR_E },
+	{ "trap2", 0xff, INSTR_E },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_a5[] = {
+	{ "iihh", 0x00, INSTR_RI_RU },
+	{ "iihl", 0x01, INSTR_RI_RU },
+	{ "iilh", 0x02, INSTR_RI_RU },
+	{ "iill", 0x03, INSTR_RI_RU },
+	{ "nihh", 0x04, INSTR_RI_RU },
+	{ "nihl", 0x05, INSTR_RI_RU },
+	{ "nilh", 0x06, INSTR_RI_RU },
+	{ "nill", 0x07, INSTR_RI_RU },
+	{ "oihh", 0x08, INSTR_RI_RU },
+	{ "oihl", 0x09, INSTR_RI_RU },
+	{ "oilh", 0x0a, INSTR_RI_RU },
+	{ "oill", 0x0b, INSTR_RI_RU },
+	{ "llihh", 0x0c, INSTR_RI_RU },
+	{ "llihl", 0x0d, INSTR_RI_RU },
+	{ "llilh", 0x0e, INSTR_RI_RU },
+	{ "llill", 0x0f, INSTR_RI_RU },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_a7[] = {
+	{ "tmhh", 0x02, INSTR_RI_RU },
+	{ "tmhl", 0x03, INSTR_RI_RU },
+	{ "brctg", 0x07, INSTR_RI_RP },
+	{ "lghi", 0x09, INSTR_RI_RI },
+	{ "aghi", 0x0b, INSTR_RI_RI },
+	{ "mghi", 0x0d, INSTR_RI_RI },
+	{ "cghi", 0x0f, INSTR_RI_RI },
+	{ "tmlh", 0x00, INSTR_RI_RU },
+	{ "tmll", 0x01, INSTR_RI_RU },
+	{ "brc", 0x04, INSTR_RI_UP },
+	{ "bras", 0x05, INSTR_RI_RP },
+	{ "brct", 0x06, INSTR_RI_RP },
+	{ "lhi", 0x08, INSTR_RI_RI },
+	{ "ahi", 0x0a, INSTR_RI_RI },
+	{ "mhi", 0x0c, INSTR_RI_RI },
+	{ "chi", 0x0e, INSTR_RI_RI },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_aa[] = {
+	{ { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
+	{ "rion", 0x01, INSTR_RI_RI },
+	{ "tric", 0x02, INSTR_RI_RI },
+	{ "rioff", 0x03, INSTR_RI_RI },
+	{ { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_b2[] = {
+	{ "stckf", 0x7c, INSTR_S_RD },
+	{ "lpp", 0x80, INSTR_S_RD },
+	{ "lcctl", 0x84, INSTR_S_RD },
+	{ "lpctl", 0x85, INSTR_S_RD },
+	{ "qsi", 0x86, INSTR_S_RD },
+	{ "lsctl", 0x87, INSTR_S_RD },
+	{ "qctri", 0x8e, INSTR_S_RD },
+	{ "stfle", 0xb0, INSTR_S_RD },
+	{ "lpswe", 0xb2, INSTR_S_RD },
+	{ "srnmb", 0xb8, INSTR_S_RD },
+	{ "srnmt", 0xb9, INSTR_S_RD },
+	{ "lfas", 0xbd, INSTR_S_RD },
+	{ "scctr", 0xe0, INSTR_RRE_RR },
+	{ "spctr", 0xe1, INSTR_RRE_RR },
+	{ "ecctr", 0xe4, INSTR_RRE_RR },
+	{ "epctr", 0xe5, INSTR_RRE_RR },
+	{ "ppa", 0xe8, INSTR_RRF_U0RR },
+	{ "etnd", 0xec, INSTR_RRE_R0 },
+	{ "ecpga", 0xed, INSTR_RRE_RR },
+	{ "tend", 0xf8, INSTR_S_00 },
+	{ "niai", 0xfa, INSTR_IE_UU },
+	{ { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD },
+	{ "stidp", 0x02, INSTR_S_RD },
+	{ "sck", 0x04, INSTR_S_RD },
+	{ "stck", 0x05, INSTR_S_RD },
+	{ "sckc", 0x06, INSTR_S_RD },
+	{ "stckc", 0x07, INSTR_S_RD },
+	{ "spt", 0x08, INSTR_S_RD },
+	{ "stpt", 0x09, INSTR_S_RD },
+	{ "spka", 0x0a, INSTR_S_RD },
+	{ "ipk", 0x0b, INSTR_S_00 },
+	{ "ptlb", 0x0d, INSTR_S_00 },
+	{ "spx", 0x10, INSTR_S_RD },
+	{ "stpx", 0x11, INSTR_S_RD },
+	{ "stap", 0x12, INSTR_S_RD },
+	{ "sie", 0x14, INSTR_S_RD },
+	{ "pc", 0x18, INSTR_S_RD },
+	{ "sac", 0x19, INSTR_S_RD },
+	{ "cfc", 0x1a, INSTR_S_RD },
+	{ "servc", 0x20, INSTR_RRE_RR },
+	{ "ipte", 0x21, INSTR_RRE_RR },
+	{ "ipm", 0x22, INSTR_RRE_R0 },
+	{ "ivsk", 0x23, INSTR_RRE_RR },
+	{ "iac", 0x24, INSTR_RRE_R0 },
+	{ "ssar", 0x25, INSTR_RRE_R0 },
+	{ "epar", 0x26, INSTR_RRE_R0 },
+	{ "esar", 0x27, INSTR_RRE_R0 },
+	{ "pt", 0x28, INSTR_RRE_RR },
+	{ "iske", 0x29, INSTR_RRE_RR },
+	{ "rrbe", 0x2a, INSTR_RRE_RR },
+	{ "sske", 0x2b, INSTR_RRF_M0RR },
+	{ "tb", 0x2c, INSTR_RRE_0R },
+	{ "dxr", 0x2d, INSTR_RRE_FF },
+	{ "pgin", 0x2e, INSTR_RRE_RR },
+	{ "pgout", 0x2f, INSTR_RRE_RR },
+	{ "csch", 0x30, INSTR_S_00 },
+	{ "hsch", 0x31, INSTR_S_00 },
+	{ "msch", 0x32, INSTR_S_RD },
+	{ "ssch", 0x33, INSTR_S_RD },
+	{ "stsch", 0x34, INSTR_S_RD },
+	{ "tsch", 0x35, INSTR_S_RD },
+	{ "tpi", 0x36, INSTR_S_RD },
+	{ "sal", 0x37, INSTR_S_00 },
+	{ "rsch", 0x38, INSTR_S_00 },
+	{ "stcrw", 0x39, INSTR_S_RD },
+	{ "stcps", 0x3a, INSTR_S_RD },
+	{ "rchp", 0x3b, INSTR_S_00 },
+	{ "schm", 0x3c, INSTR_S_00 },
+	{ "bakr", 0x40, INSTR_RRE_RR },
+	{ "cksm", 0x41, INSTR_RRE_RR },
+	{ "sqdr", 0x44, INSTR_RRE_FF },
+	{ "sqer", 0x45, INSTR_RRE_FF },
+	{ "stura", 0x46, INSTR_RRE_RR },
+	{ "msta", 0x47, INSTR_RRE_R0 },
+	{ "palb", 0x48, INSTR_RRE_00 },
+	{ "ereg", 0x49, INSTR_RRE_RR },
+	{ "esta", 0x4a, INSTR_RRE_RR },
+	{ "lura", 0x4b, INSTR_RRE_RR },
+	{ "tar", 0x4c, INSTR_RRE_AR },
+	{ "cpya", 0x4d, INSTR_RRE_AA },
+	{ "sar", 0x4e, INSTR_RRE_AR },
+	{ "ear", 0x4f, INSTR_RRE_RA },
+	{ "csp", 0x50, INSTR_RRE_RR },
+	{ "msr", 0x52, INSTR_RRE_RR },
+	{ "mvpg", 0x54, INSTR_RRE_RR },
+	{ "mvst", 0x55, INSTR_RRE_RR },
+	{ "cuse", 0x57, INSTR_RRE_RR },
+	{ "bsg", 0x58, INSTR_RRE_RR },
+	{ "bsa", 0x5a, INSTR_RRE_RR },
+	{ "clst", 0x5d, INSTR_RRE_RR },
+	{ "srst", 0x5e, INSTR_RRE_RR },
+	{ "cmpsc", 0x63, INSTR_RRE_RR },
+	{ "siga", 0x74, INSTR_S_RD },
+	{ "xsch", 0x76, INSTR_S_00 },
+	{ "rp", 0x77, INSTR_S_RD },
+	{ "stcke", 0x78, INSTR_S_RD },
+	{ "sacf", 0x79, INSTR_S_RD },
+	{ "stsi", 0x7d, INSTR_S_RD },
+	{ "srnm", 0x99, INSTR_S_RD },
+	{ "stfpc", 0x9c, INSTR_S_RD },
+	{ "lfpc", 0x9d, INSTR_S_RD },
+	{ "tre", 0xa5, INSTR_RRE_RR },
+	{ "cuutf", 0xa6, INSTR_RRF_M0RR },
+	{ "cutfu", 0xa7, INSTR_RRF_M0RR },
+	{ "stfl", 0xb1, INSTR_S_RD },
+	{ "trap4", 0xff, INSTR_S_RD },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_b3[] = {
+	{ "maylr", 0x38, INSTR_RRF_F0FF },
+	{ "mylr", 0x39, INSTR_RRF_F0FF },
+	{ "mayr", 0x3a, INSTR_RRF_F0FF },
+	{ "myr", 0x3b, INSTR_RRF_F0FF },
+	{ "mayhr", 0x3c, INSTR_RRF_F0FF },
+	{ "myhr", 0x3d, INSTR_RRF_F0FF },
+	{ "lpdfr", 0x70, INSTR_RRE_FF },
+	{ "lndfr", 0x71, INSTR_RRE_FF },
+	{ "cpsdr", 0x72, INSTR_RRF_F0FF2 },
+	{ "lcdfr", 0x73, INSTR_RRE_FF },
+	{ "sfasr", 0x85, INSTR_RRE_R0 },
+	{ { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR },
+	{ "ldgr", 0xc1, INSTR_RRE_FR },
+	{ "cegr", 0xc4, INSTR_RRE_FR },
+	{ "cdgr", 0xc5, INSTR_RRE_FR },
+	{ "cxgr", 0xc6, INSTR_RRE_FR },
+	{ "cger", 0xc8, INSTR_RRF_U0RF },
+	{ "cgdr", 0xc9, INSTR_RRF_U0RF },
+	{ "cgxr", 0xca, INSTR_RRF_U0RF },
+	{ "lgdr", 0xcd, INSTR_RRE_RF },
+	{ "mdtra", 0xd0, INSTR_RRF_FUFF2 },
+	{ "ddtra", 0xd1, INSTR_RRF_FUFF2 },
+	{ "adtra", 0xd2, INSTR_RRF_FUFF2 },
+	{ "sdtra", 0xd3, INSTR_RRF_FUFF2 },
+	{ "ldetr", 0xd4, INSTR_RRF_0UFF },
+	{ "ledtr", 0xd5, INSTR_RRF_UUFF },
+	{ "ltdtr", 0xd6, INSTR_RRE_FF },
+	{ "fidtr", 0xd7, INSTR_RRF_UUFF },
+	{ "mxtra", 0xd8, INSTR_RRF_FUFF2 },
+	{ "dxtra", 0xd9, INSTR_RRF_FUFF2 },
+	{ "axtra", 0xda, INSTR_RRF_FUFF2 },
+	{ "sxtra", 0xdb, INSTR_RRF_FUFF2 },
+	{ "lxdtr", 0xdc, INSTR_RRF_0UFF },
+	{ "ldxtr", 0xdd, INSTR_RRF_UUFF },
+	{ "ltxtr", 0xde, INSTR_RRE_FF },
+	{ "fixtr", 0xdf, INSTR_RRF_UUFF },
+	{ "kdtr", 0xe0, INSTR_RRE_FF },
+	{ { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF },
+	{ "cudtr", 0xe2, INSTR_RRE_RF },
+	{ "csdtr", 0xe3, INSTR_RRE_RF },
+	{ "cdtr", 0xe4, INSTR_RRE_FF },
+	{ "eedtr", 0xe5, INSTR_RRE_RF },
+	{ "esdtr", 0xe7, INSTR_RRE_RF },
+	{ "kxtr", 0xe8, INSTR_RRE_FF },
+	{ { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR },
+	{ "cuxtr", 0xea, INSTR_RRE_RF },
+	{ "csxtr", 0xeb, INSTR_RRE_RF },
+	{ "cxtr", 0xec, INSTR_RRE_FF },
+	{ "eextr", 0xed, INSTR_RRE_RF },
+	{ "esxtr", 0xef, INSTR_RRE_RF },
+	{ { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR },
+	{ "cdutr", 0xf2, INSTR_RRE_FR },
+	{ "cdstr", 0xf3, INSTR_RRE_FR },
+	{ "cedtr", 0xf4, INSTR_RRE_FF },
+	{ "qadtr", 0xf5, INSTR_RRF_FUFF },
+	{ "iedtr", 0xf6, INSTR_RRF_F0FR },
+	{ "rrdtr", 0xf7, INSTR_RRF_FFRU },
+	{ { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF },
+	{ "cxutr", 0xfa, INSTR_RRE_FR },
+	{ "cxstr", 0xfb, INSTR_RRE_FR },
+	{ "cextr", 0xfc, INSTR_RRE_FF },
+	{ "qaxtr", 0xfd, INSTR_RRF_FUFF },
+	{ "iextr", 0xfe, INSTR_RRF_F0FR },
+	{ "rrxtr", 0xff, INSTR_RRF_FFRU },
+	{ "lpebr", 0x00, INSTR_RRE_FF },
+	{ "lnebr", 0x01, INSTR_RRE_FF },
+	{ "ltebr", 0x02, INSTR_RRE_FF },
+	{ "lcebr", 0x03, INSTR_RRE_FF },
+	{ "ldebr", 0x04, INSTR_RRE_FF },
+	{ "lxdbr", 0x05, INSTR_RRE_FF },
+	{ "lxebr", 0x06, INSTR_RRE_FF },
+	{ "mxdbr", 0x07, INSTR_RRE_FF },
+	{ "kebr", 0x08, INSTR_RRE_FF },
+	{ "cebr", 0x09, INSTR_RRE_FF },
+	{ "aebr", 0x0a, INSTR_RRE_FF },
+	{ "sebr", 0x0b, INSTR_RRE_FF },
+	{ "mdebr", 0x0c, INSTR_RRE_FF },
+	{ "debr", 0x0d, INSTR_RRE_FF },
+	{ "maebr", 0x0e, INSTR_RRF_F0FF },
+	{ "msebr", 0x0f, INSTR_RRF_F0FF },
+	{ "lpdbr", 0x10, INSTR_RRE_FF },
+	{ "lndbr", 0x11, INSTR_RRE_FF },
+	{ "ltdbr", 0x12, INSTR_RRE_FF },
+	{ "lcdbr", 0x13, INSTR_RRE_FF },
+	{ "sqebr", 0x14, INSTR_RRE_FF },
+	{ "sqdbr", 0x15, INSTR_RRE_FF },
+	{ "sqxbr", 0x16, INSTR_RRE_FF },
+	{ "meebr", 0x17, INSTR_RRE_FF },
+	{ "kdbr", 0x18, INSTR_RRE_FF },
+	{ "cdbr", 0x19, INSTR_RRE_FF },
+	{ "adbr", 0x1a, INSTR_RRE_FF },
+	{ "sdbr", 0x1b, INSTR_RRE_FF },
+	{ "mdbr", 0x1c, INSTR_RRE_FF },
+	{ "ddbr", 0x1d, INSTR_RRE_FF },
+	{ "madbr", 0x1e, INSTR_RRF_F0FF },
+	{ "msdbr", 0x1f, INSTR_RRF_F0FF },
+	{ "lder", 0x24, INSTR_RRE_FF },
+	{ "lxdr", 0x25, INSTR_RRE_FF },
+	{ "lxer", 0x26, INSTR_RRE_FF },
+	{ "maer", 0x2e, INSTR_RRF_F0FF },
+	{ "mser", 0x2f, INSTR_RRF_F0FF },
+	{ "sqxr", 0x36, INSTR_RRE_FF },
+	{ "meer", 0x37, INSTR_RRE_FF },
+	{ "madr", 0x3e, INSTR_RRF_F0FF },
+	{ "msdr", 0x3f, INSTR_RRF_F0FF },
+	{ "lpxbr", 0x40, INSTR_RRE_FF },
+	{ "lnxbr", 0x41, INSTR_RRE_FF },
+	{ "ltxbr", 0x42, INSTR_RRE_FF },
+	{ "lcxbr", 0x43, INSTR_RRE_FF },
+	{ { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF },
+	{ { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF },
+	{ { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF },
+	{ { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF },
+	{ "kxbr", 0x48, INSTR_RRE_FF },
+	{ "cxbr", 0x49, INSTR_RRE_FF },
+	{ "axbr", 0x4a, INSTR_RRE_FF },
+	{ "sxbr", 0x4b, INSTR_RRE_FF },
+	{ "mxbr", 0x4c, INSTR_RRE_FF },
+	{ "dxbr", 0x4d, INSTR_RRE_FF },
+	{ "tbedr", 0x50, INSTR_RRF_U0FF },
+	{ "tbdr", 0x51, INSTR_RRF_U0FF },
+	{ "diebr", 0x53, INSTR_RRF_FUFF },
+	{ { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF },
+	{ "thder", 0x58, INSTR_RRE_FF },
+	{ "thdr", 0x59, INSTR_RRE_FF },
+	{ "didbr", 0x5b, INSTR_RRF_FUFF },
+	{ { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF },
+	{ "lpxr", 0x60, INSTR_RRE_FF },
+	{ "lnxr", 0x61, INSTR_RRE_FF },
+	{ "ltxr", 0x62, INSTR_RRE_FF },
+	{ "lcxr", 0x63, INSTR_RRE_FF },
+	{ "lxr", 0x65, INSTR_RRE_FF },
+	{ "lexr", 0x66, INSTR_RRE_FF },
+	{ "fixr", 0x67, INSTR_RRE_FF },
+	{ "cxr", 0x69, INSTR_RRE_FF },
+	{ "lzer", 0x74, INSTR_RRE_F0 },
+	{ "lzdr", 0x75, INSTR_RRE_F0 },
+	{ "lzxr", 0x76, INSTR_RRE_F0 },
+	{ "fier", 0x77, INSTR_RRE_FF },
+	{ "fidr", 0x7f, INSTR_RRE_FF },
+	{ "sfpc", 0x84, INSTR_RRE_RR_OPT },
+	{ "efpc", 0x8c, INSTR_RRE_RR_OPT },
+	{ "cefbr", 0x94, INSTR_RRE_RF },
+	{ "cdfbr", 0x95, INSTR_RRE_RF },
+	{ "cxfbr", 0x96, INSTR_RRE_RF },
+	{ "cfebr", 0x98, INSTR_RRF_U0RF },
+	{ "cfdbr", 0x99, INSTR_RRF_U0RF },
+	{ "cfxbr", 0x9a, INSTR_RRF_U0RF },
+	{ "cefr", 0xb4, INSTR_RRE_FR },
+	{ "cdfr", 0xb5, INSTR_RRE_FR },
+	{ "cxfr", 0xb6, INSTR_RRE_FR },
+	{ "cfer", 0xb8, INSTR_RRF_U0RF },
+	{ "cfdr", 0xb9, INSTR_RRF_U0RF },
+	{ "cfxr", 0xba, INSTR_RRF_U0RF },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_b9[] = {
+	{ "lpgr", 0x00, INSTR_RRE_RR },
+	{ "lngr", 0x01, INSTR_RRE_RR },
+	{ "ltgr", 0x02, INSTR_RRE_RR },
+	{ "lcgr", 0x03, INSTR_RRE_RR },
+	{ "lgr", 0x04, INSTR_RRE_RR },
+	{ "lurag", 0x05, INSTR_RRE_RR },
+	{ "lgbr", 0x06, INSTR_RRE_RR },
+	{ "lghr", 0x07, INSTR_RRE_RR },
+	{ "agr", 0x08, INSTR_RRE_RR },
+	{ "sgr", 0x09, INSTR_RRE_RR },
+	{ "algr", 0x0a, INSTR_RRE_RR },
+	{ "slgr", 0x0b, INSTR_RRE_RR },
+	{ "msgr", 0x0c, INSTR_RRE_RR },
+	{ "dsgr", 0x0d, INSTR_RRE_RR },
+	{ "eregg", 0x0e, INSTR_RRE_RR },
+	{ "lrvgr", 0x0f, INSTR_RRE_RR },
+	{ "lpgfr", 0x10, INSTR_RRE_RR },
+	{ "lngfr", 0x11, INSTR_RRE_RR },
+	{ "ltgfr", 0x12, INSTR_RRE_RR },
+	{ "lcgfr", 0x13, INSTR_RRE_RR },
+	{ "lgfr", 0x14, INSTR_RRE_RR },
+	{ "llgfr", 0x16, INSTR_RRE_RR },
+	{ "llgtr", 0x17, INSTR_RRE_RR },
+	{ "agfr", 0x18, INSTR_RRE_RR },
+	{ "sgfr", 0x19, INSTR_RRE_RR },
+	{ "algfr", 0x1a, INSTR_RRE_RR },
+	{ "slgfr", 0x1b, INSTR_RRE_RR },
+	{ "msgfr", 0x1c, INSTR_RRE_RR },
+	{ "dsgfr", 0x1d, INSTR_RRE_RR },
+	{ "cgr", 0x20, INSTR_RRE_RR },
+	{ "clgr", 0x21, INSTR_RRE_RR },
+	{ "sturg", 0x25, INSTR_RRE_RR },
+	{ "lbr", 0x26, INSTR_RRE_RR },
+	{ "lhr", 0x27, INSTR_RRE_RR },
+	{ "cgfr", 0x30, INSTR_RRE_RR },
+	{ "clgfr", 0x31, INSTR_RRE_RR },
+	{ "cfdtr", 0x41, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF },
+	{ "bctgr", 0x46, INSTR_RRE_RR },
+	{ "cfxtr", 0x49, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR },
+	{ "cdftr", 0x51, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR },
+	{ { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR },
+	{ "cxftr", 0x59, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF },
+	{ { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR },
+	{ "cgrt", 0x60, INSTR_RRF_U0RR },
+	{ "clgrt", 0x61, INSTR_RRF_U0RR },
+	{ "crt", 0x72, INSTR_RRF_U0RR },
+	{ "clrt", 0x73, INSTR_RRF_U0RR },
+	{ "ngr", 0x80, INSTR_RRE_RR },
+	{ "ogr", 0x81, INSTR_RRE_RR },
+	{ "xgr", 0x82, INSTR_RRE_RR },
+	{ "flogr", 0x83, INSTR_RRE_RR },
+	{ "llgcr", 0x84, INSTR_RRE_RR },
+	{ "llghr", 0x85, INSTR_RRE_RR },
+	{ "mlgr", 0x86, INSTR_RRE_RR },
+	{ "dlgr", 0x87, INSTR_RRE_RR },
+	{ "alcgr", 0x88, INSTR_RRE_RR },
+	{ "slbgr", 0x89, INSTR_RRE_RR },
+	{ "cspg", 0x8a, INSTR_RRE_RR },
+	{ "idte", 0x8e, INSTR_RRF_R0RR },
+	{ "crdte", 0x8f, INSTR_RRF_RMRR },
+	{ "llcr", 0x94, INSTR_RRE_RR },
+	{ "llhr", 0x95, INSTR_RRE_RR },
+	{ "esea", 0x9d, INSTR_RRE_R0 },
+	{ "ptf", 0xa2, INSTR_RRE_R0 },
+	{ "lptea", 0xaa, INSTR_RRF_RURR },
+	{ "rrbm", 0xae, INSTR_RRE_RR },
+	{ "pfmf", 0xaf, INSTR_RRE_RR },
+	{ "cu14", 0xb0, INSTR_RRF_M0RR },
+	{ "cu24", 0xb1, INSTR_RRF_M0RR },
+	{ "cu41", 0xb2, INSTR_RRE_RR },
+	{ "cu42", 0xb3, INSTR_RRE_RR },
+	{ "trtre", 0xbd, INSTR_RRF_M0RR },
+	{ "srstu", 0xbe, INSTR_RRE_RR },
+	{ "trte", 0xbf, INSTR_RRF_M0RR },
+	{ "ahhhr", 0xc8, INSTR_RRF_R0RR2 },
+	{ "shhhr", 0xc9, INSTR_RRF_R0RR2 },
+	{ { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 },
+	{ { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 },
+	{ "chhr", 0xcd, INSTR_RRE_RR },
+	{ "clhhr", 0xcf, INSTR_RRE_RR },
+	{ { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR },
+	{ "pcilg", 0xd2, INSTR_RRE_RR },
+	{ "rpcit", 0xd3, INSTR_RRE_RR },
+	{ "ahhlr", 0xd8, INSTR_RRF_R0RR2 },
+	{ "shhlr", 0xd9, INSTR_RRF_R0RR2 },
+	{ { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 },
+	{ { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 },
+	{ "chlr", 0xdd, INSTR_RRE_RR },
+	{ "clhlr", 0xdf, INSTR_RRE_RR },
+	{ { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR },
+	{ "locgr", 0xe2, INSTR_RRF_M0RR },
+	{ "ngrk", 0xe4, INSTR_RRF_R0RR2 },
+	{ "ogrk", 0xe6, INSTR_RRF_R0RR2 },
+	{ "xgrk", 0xe7, INSTR_RRF_R0RR2 },
+	{ "agrk", 0xe8, INSTR_RRF_R0RR2 },
+	{ "sgrk", 0xe9, INSTR_RRF_R0RR2 },
+	{ "algrk", 0xea, INSTR_RRF_R0RR2 },
+	{ "slgrk", 0xeb, INSTR_RRF_R0RR2 },
+	{ "locr", 0xf2, INSTR_RRF_M0RR },
+	{ "nrk", 0xf4, INSTR_RRF_R0RR2 },
+	{ "ork", 0xf6, INSTR_RRF_R0RR2 },
+	{ "xrk", 0xf7, INSTR_RRF_R0RR2 },
+	{ "ark", 0xf8, INSTR_RRF_R0RR2 },
+	{ "srk", 0xf9, INSTR_RRF_R0RR2 },
+	{ "alrk", 0xfa, INSTR_RRF_R0RR2 },
+	{ "slrk", 0xfb, INSTR_RRF_R0RR2 },
+	{ "kmac", 0x1e, INSTR_RRE_RR },
+	{ "lrvr", 0x1f, INSTR_RRE_RR },
+	{ "km", 0x2e, INSTR_RRE_RR },
+	{ "kmc", 0x2f, INSTR_RRE_RR },
+	{ "kimd", 0x3e, INSTR_RRE_RR },
+	{ "klmd", 0x3f, INSTR_RRE_RR },
+	{ "epsw", 0x8d, INSTR_RRE_RR },
+	{ "trtt", 0x90, INSTR_RRF_M0RR },
+	{ "trto", 0x91, INSTR_RRF_M0RR },
+	{ "trot", 0x92, INSTR_RRF_M0RR },
+	{ "troo", 0x93, INSTR_RRF_M0RR },
+	{ "mlr", 0x96, INSTR_RRE_RR },
+	{ "dlr", 0x97, INSTR_RRE_RR },
+	{ "alcr", 0x98, INSTR_RRE_RR },
+	{ "slbr", 0x99, INSTR_RRE_RR },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c0[] = {
+	{ "lgfi", 0x01, INSTR_RIL_RI },
+	{ "xihf", 0x06, INSTR_RIL_RU },
+	{ "xilf", 0x07, INSTR_RIL_RU },
+	{ "iihf", 0x08, INSTR_RIL_RU },
+	{ "iilf", 0x09, INSTR_RIL_RU },
+	{ "nihf", 0x0a, INSTR_RIL_RU },
+	{ "nilf", 0x0b, INSTR_RIL_RU },
+	{ "oihf", 0x0c, INSTR_RIL_RU },
+	{ "oilf", 0x0d, INSTR_RIL_RU },
+	{ "llihf", 0x0e, INSTR_RIL_RU },
+	{ "llilf", 0x0f, INSTR_RIL_RU },
+	{ "larl", 0x00, INSTR_RIL_RP },
+	{ "brcl", 0x04, INSTR_RIL_UP },
+	{ "brasl", 0x05, INSTR_RIL_RP },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c2[] = {
+	{ "msgfi", 0x00, INSTR_RIL_RI },
+	{ "msfi", 0x01, INSTR_RIL_RI },
+	{ "slgfi", 0x04, INSTR_RIL_RU },
+	{ "slfi", 0x05, INSTR_RIL_RU },
+	{ "agfi", 0x08, INSTR_RIL_RI },
+	{ "afi", 0x09, INSTR_RIL_RI },
+	{ "algfi", 0x0a, INSTR_RIL_RU },
+	{ "alfi", 0x0b, INSTR_RIL_RU },
+	{ "cgfi", 0x0c, INSTR_RIL_RI },
+	{ "cfi", 0x0d, INSTR_RIL_RI },
+	{ "clgfi", 0x0e, INSTR_RIL_RU },
+	{ "clfi", 0x0f, INSTR_RIL_RU },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c4[] = {
+	{ "llhrl", 0x02, INSTR_RIL_RP },
+	{ "lghrl", 0x04, INSTR_RIL_RP },
+	{ "lhrl", 0x05, INSTR_RIL_RP },
+	{ { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP },
+	{ "sthrl", 0x07, INSTR_RIL_RP },
+	{ "lgrl", 0x08, INSTR_RIL_RP },
+	{ "stgrl", 0x0b, INSTR_RIL_RP },
+	{ "lgfrl", 0x0c, INSTR_RIL_RP },
+	{ "lrl", 0x0d, INSTR_RIL_RP },
+	{ { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP },
+	{ "strl", 0x0f, INSTR_RIL_RP },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c6[] = {
+	{ "exrl", 0x00, INSTR_RIL_RP },
+	{ "pfdrl", 0x02, INSTR_RIL_UP },
+	{ "cghrl", 0x04, INSTR_RIL_RP },
+	{ "chrl", 0x05, INSTR_RIL_RP },
+	{ { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP },
+	{ "clhrl", 0x07, INSTR_RIL_RP },
+	{ "cgrl", 0x08, INSTR_RIL_RP },
+	{ "clgrl", 0x0a, INSTR_RIL_RP },
+	{ "cgfrl", 0x0c, INSTR_RIL_RP },
+	{ "crl", 0x0d, INSTR_RIL_RP },
+	{ { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP },
+	{ "clrl", 0x0f, INSTR_RIL_RP },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c8[] = {
+	{ "mvcos", 0x00, INSTR_SSF_RRDRD },
+	{ "ectg", 0x01, INSTR_SSF_RRDRD },
+	{ "csst", 0x02, INSTR_SSF_RRDRD },
+	{ "lpd", 0x04, INSTR_SSF_RRDRD2 },
+	{ "lpdg", 0x05, INSTR_SSF_RRDRD2 },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_cc[] = {
+	{ "brcth", 0x06, INSTR_RIL_RP },
+	{ "aih", 0x08, INSTR_RIL_RI },
+	{ "alsih", 0x0a, INSTR_RIL_RI },
+	{ { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI },
+	{ "cih", 0x0d, INSTR_RIL_RI },
+	{ "clih", 0x0f, INSTR_RIL_RI },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_e3[] = {
+	{ "ltg", 0x02, INSTR_RXY_RRRD },
+	{ "lrag", 0x03, INSTR_RXY_RRRD },
+	{ "lg", 0x04, INSTR_RXY_RRRD },
+	{ "cvby", 0x06, INSTR_RXY_RRRD },
+	{ "ag", 0x08, INSTR_RXY_RRRD },
+	{ "sg", 0x09, INSTR_RXY_RRRD },
+	{ "alg", 0x0a, INSTR_RXY_RRRD },
+	{ "slg", 0x0b, INSTR_RXY_RRRD },
+	{ "msg", 0x0c, INSTR_RXY_RRRD },
+	{ "dsg", 0x0d, INSTR_RXY_RRRD },
+	{ "cvbg", 0x0e, INSTR_RXY_RRRD },
+	{ "lrvg", 0x0f, INSTR_RXY_RRRD },
+	{ "lt", 0x12, INSTR_RXY_RRRD },
+	{ "lray", 0x13, INSTR_RXY_RRRD },
+	{ "lgf", 0x14, INSTR_RXY_RRRD },
+	{ "lgh", 0x15, INSTR_RXY_RRRD },
+	{ "llgf", 0x16, INSTR_RXY_RRRD },
+	{ "llgt", 0x17, INSTR_RXY_RRRD },
+	{ "agf", 0x18, INSTR_RXY_RRRD },
+	{ "sgf", 0x19, INSTR_RXY_RRRD },
+	{ "algf", 0x1a, INSTR_RXY_RRRD },
+	{ "slgf", 0x1b, INSTR_RXY_RRRD },
+	{ "msgf", 0x1c, INSTR_RXY_RRRD },
+	{ "dsgf", 0x1d, INSTR_RXY_RRRD },
+	{ "cg", 0x20, INSTR_RXY_RRRD },
+	{ "clg", 0x21, INSTR_RXY_RRRD },
+	{ "stg", 0x24, INSTR_RXY_RRRD },
+	{ "ntstg", 0x25, INSTR_RXY_RRRD },
+	{ "cvdy", 0x26, INSTR_RXY_RRRD },
+	{ "cvdg", 0x2e, INSTR_RXY_RRRD },
+	{ "strvg", 0x2f, INSTR_RXY_RRRD },
+	{ "cgf", 0x30, INSTR_RXY_RRRD },
+	{ "clgf", 0x31, INSTR_RXY_RRRD },
+	{ "ltgf", 0x32, INSTR_RXY_RRRD },
+	{ "cgh", 0x34, INSTR_RXY_RRRD },
+	{ "pfd", 0x36, INSTR_RXY_URRD },
+	{ "strvh", 0x3f, INSTR_RXY_RRRD },
+	{ "bctg", 0x46, INSTR_RXY_RRRD },
+	{ "sty", 0x50, INSTR_RXY_RRRD },
+	{ "msy", 0x51, INSTR_RXY_RRRD },
+	{ "ny", 0x54, INSTR_RXY_RRRD },
+	{ "cly", 0x55, INSTR_RXY_RRRD },
+	{ "oy", 0x56, INSTR_RXY_RRRD },
+	{ "xy", 0x57, INSTR_RXY_RRRD },
+	{ "ly", 0x58, INSTR_RXY_RRRD },
+	{ "cy", 0x59, INSTR_RXY_RRRD },
+	{ "ay", 0x5a, INSTR_RXY_RRRD },
+	{ "sy", 0x5b, INSTR_RXY_RRRD },
+	{ "mfy", 0x5c, INSTR_RXY_RRRD },
+	{ "aly", 0x5e, INSTR_RXY_RRRD },
+	{ "sly", 0x5f, INSTR_RXY_RRRD },
+	{ "sthy", 0x70, INSTR_RXY_RRRD },
+	{ "lay", 0x71, INSTR_RXY_RRRD },
+	{ "stcy", 0x72, INSTR_RXY_RRRD },
+	{ "icy", 0x73, INSTR_RXY_RRRD },
+	{ "laey", 0x75, INSTR_RXY_RRRD },
+	{ "lb", 0x76, INSTR_RXY_RRRD },
+	{ "lgb", 0x77, INSTR_RXY_RRRD },
+	{ "lhy", 0x78, INSTR_RXY_RRRD },
+	{ "chy", 0x79, INSTR_RXY_RRRD },
+	{ "ahy", 0x7a, INSTR_RXY_RRRD },
+	{ "shy", 0x7b, INSTR_RXY_RRRD },
+	{ "mhy", 0x7c, INSTR_RXY_RRRD },
+	{ "ng", 0x80, INSTR_RXY_RRRD },
+	{ "og", 0x81, INSTR_RXY_RRRD },
+	{ "xg", 0x82, INSTR_RXY_RRRD },
+	{ "lgat", 0x85, INSTR_RXY_RRRD },
+	{ "mlg", 0x86, INSTR_RXY_RRRD },
+	{ "dlg", 0x87, INSTR_RXY_RRRD },
+	{ "alcg", 0x88, INSTR_RXY_RRRD },
+	{ "slbg", 0x89, INSTR_RXY_RRRD },
+	{ "stpq", 0x8e, INSTR_RXY_RRRD },
+	{ "lpq", 0x8f, INSTR_RXY_RRRD },
+	{ "llgc", 0x90, INSTR_RXY_RRRD },
+	{ "llgh", 0x91, INSTR_RXY_RRRD },
+	{ "llc", 0x94, INSTR_RXY_RRRD },
+	{ "llh", 0x95, INSTR_RXY_RRRD },
+	{ { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD },
+	{ { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD },
+	{ "lat", 0x9f, INSTR_RXY_RRRD },
+	{ "lbh", 0xc0, INSTR_RXY_RRRD },
+	{ "llch", 0xc2, INSTR_RXY_RRRD },
+	{ "stch", 0xc3, INSTR_RXY_RRRD },
+	{ "lhh", 0xc4, INSTR_RXY_RRRD },
+	{ "llhh", 0xc6, INSTR_RXY_RRRD },
+	{ "sthh", 0xc7, INSTR_RXY_RRRD },
+	{ "lfhat", 0xc8, INSTR_RXY_RRRD },
+	{ "lfh", 0xca, INSTR_RXY_RRRD },
+	{ "stfh", 0xcb, INSTR_RXY_RRRD },
+	{ "chf", 0xcd, INSTR_RXY_RRRD },
+	{ "clhf", 0xcf, INSTR_RXY_RRRD },
+	{ { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD },
+	{ { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD },
+	{ "lrv", 0x1e, INSTR_RXY_RRRD },
+	{ "lrvh", 0x1f, INSTR_RXY_RRRD },
+	{ "strv", 0x3e, INSTR_RXY_RRRD },
+	{ "ml", 0x96, INSTR_RXY_RRRD },
+	{ "dl", 0x97, INSTR_RXY_RRRD },
+	{ "alc", 0x98, INSTR_RXY_RRRD },
+	{ "slb", 0x99, INSTR_RXY_RRRD },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_e5[] = {
+	{ "strag", 0x02, INSTR_SSE_RDRD },
+	{ "mvhhi", 0x44, INSTR_SIL_RDI },
+	{ "mvghi", 0x48, INSTR_SIL_RDI },
+	{ "mvhi", 0x4c, INSTR_SIL_RDI },
+	{ "chhsi", 0x54, INSTR_SIL_RDI },
+	{ { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU },
+	{ "cghsi", 0x58, INSTR_SIL_RDI },
+	{ { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU },
+	{ "chsi", 0x5c, INSTR_SIL_RDI },
+	{ { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU },
+	{ { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU },
+	{ { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU },
+	{ "lasp", 0x00, INSTR_SSE_RDRD },
+	{ "tprot", 0x01, INSTR_SSE_RDRD },
+	{ "mvcsk", 0x0e, INSTR_SSE_RDRD },
+	{ "mvcdk", 0x0f, INSTR_SSE_RDRD },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_e7[] = {
+	{ "lcbb", 0x27, INSTR_RXE_RRRDM },
+	{ "vgef", 0x13, INSTR_VRV_VVRDM },
+	{ "vgeg", 0x12, INSTR_VRV_VVRDM },
+	{ "vgbm", 0x44, INSTR_VRI_V0I0 },
+	{ "vgm", 0x46, INSTR_VRI_V0IIM },
+	{ "vl", 0x06, INSTR_VRX_VRRD0 },
+	{ "vlr", 0x56, INSTR_VRR_VV00000 },
+	{ "vlrp", 0x05, INSTR_VRX_VRRDM },
+	{ "vleb", 0x00, INSTR_VRX_VRRDM },
+	{ "vleh", 0x01, INSTR_VRX_VRRDM },
+	{ "vlef", 0x03, INSTR_VRX_VRRDM },
+	{ "vleg", 0x02, INSTR_VRX_VRRDM },
+	{ "vleib", 0x40, INSTR_VRI_V0IM },
+	{ "vleih", 0x41, INSTR_VRI_V0IM },
+	{ "vleif", 0x43, INSTR_VRI_V0IM },
+	{ "vleig", 0x42, INSTR_VRI_V0IM },
+	{ "vlgv", 0x21, INSTR_VRS_RVRDM },
+	{ "vllez", 0x04, INSTR_VRX_VRRDM },
+	{ "vlm", 0x36, INSTR_VRS_VVRD0 },
+	{ "vlbb", 0x07, INSTR_VRX_VRRDM },
+	{ "vlvg", 0x22, INSTR_VRS_VRRDM },
+	{ "vlvgp", 0x62, INSTR_VRR_VRR0000 },
+	{ "vll", 0x37, INSTR_VRS_VRRD0 },
+	{ "vmrh", 0x61, INSTR_VRR_VVV000M },
+	{ "vmrl", 0x60, INSTR_VRR_VVV000M },
+	{ "vpk", 0x94, INSTR_VRR_VVV000M },
+	{ "vpks", 0x97, INSTR_VRR_VVV0M0M },
+	{ "vpkls", 0x95, INSTR_VRR_VVV0M0M },
+	{ "vperm", 0x8c, INSTR_VRR_VVV000V },
+	{ "vpdi", 0x84, INSTR_VRR_VVV000M },
+	{ "vrep", 0x4d, INSTR_VRI_VVIM },
+	{ "vrepi", 0x45, INSTR_VRI_V0IM },
+	{ "vscef", 0x1b, INSTR_VRV_VWRDM },
+	{ "vsceg", 0x1a, INSTR_VRV_VWRDM },
+	{ "vsel", 0x8d, INSTR_VRR_VVV000V },
+	{ "vseg", 0x5f, INSTR_VRR_VV0000M },
+	{ "vst", 0x0e, INSTR_VRX_VRRD0 },
+	{ "vsteb", 0x08, INSTR_VRX_VRRDM },
+	{ "vsteh", 0x09, INSTR_VRX_VRRDM },
+	{ "vstef", 0x0b, INSTR_VRX_VRRDM },
+	{ "vsteg", 0x0a, INSTR_VRX_VRRDM },
+	{ "vstm", 0x3e, INSTR_VRS_VVRD0 },
+	{ "vstl", 0x3f, INSTR_VRS_VRRD0 },
+	{ "vuph", 0xd7, INSTR_VRR_VV0000M },
+	{ "vuplh", 0xd5, INSTR_VRR_VV0000M },
+	{ "vupl", 0xd6, INSTR_VRR_VV0000M },
+	{ "vupll", 0xd4, INSTR_VRR_VV0000M },
+	{ "va", 0xf3, INSTR_VRR_VVV000M },
+	{ "vacc", 0xf1, INSTR_VRR_VVV000M },
+	{ "vac", 0xbb, INSTR_VRR_VVVM00V },
+	{ "vaccc", 0xb9, INSTR_VRR_VVVM00V },
+	{ "vn", 0x68, INSTR_VRR_VVV0000 },
+	{ "vnc", 0x69, INSTR_VRR_VVV0000 },
+	{ "vavg", 0xf2, INSTR_VRR_VVV000M },
+	{ "vavgl", 0xf0, INSTR_VRR_VVV000M },
+	{ "vcksm", 0x66, INSTR_VRR_VVV0000 },
+	{ "vec", 0xdb, INSTR_VRR_VV0000M },
+	{ "vecl", 0xd9, INSTR_VRR_VV0000M },
+	{ "vceq", 0xf8, INSTR_VRR_VVV0M0M },
+	{ "vch", 0xfb, INSTR_VRR_VVV0M0M },
+	{ "vchl", 0xf9, INSTR_VRR_VVV0M0M },
+	{ "vclz", 0x53, INSTR_VRR_VV0000M },
+	{ "vctz", 0x52, INSTR_VRR_VV0000M },
+	{ "vx", 0x6d, INSTR_VRR_VVV0000 },
+	{ "vgfm", 0xb4, INSTR_VRR_VVV000M },
+	{ "vgfma", 0xbc, INSTR_VRR_VVVM00V },
+	{ "vlc", 0xde, INSTR_VRR_VV0000M },
+	{ "vlp", 0xdf, INSTR_VRR_VV0000M },
+	{ "vmx", 0xff, INSTR_VRR_VVV000M },
+	{ "vmxl", 0xfd, INSTR_VRR_VVV000M },
+	{ "vmn", 0xfe, INSTR_VRR_VVV000M },
+	{ "vmnl", 0xfc, INSTR_VRR_VVV000M },
+	{ "vmal", 0xaa, INSTR_VRR_VVVM00V },
+	{ "vmae", 0xae, INSTR_VRR_VVVM00V },
+	{ "vmale", 0xac, INSTR_VRR_VVVM00V },
+	{ "vmah", 0xab, INSTR_VRR_VVVM00V },
+	{ "vmalh", 0xa9, INSTR_VRR_VVVM00V },
+	{ "vmao", 0xaf, INSTR_VRR_VVVM00V },
+	{ "vmalo", 0xad, INSTR_VRR_VVVM00V },
+	{ "vmh", 0xa3, INSTR_VRR_VVV000M },
+	{ "vmlh", 0xa1, INSTR_VRR_VVV000M },
+	{ "vml", 0xa2, INSTR_VRR_VVV000M },
+	{ "vme", 0xa6, INSTR_VRR_VVV000M },
+	{ "vmle", 0xa4, INSTR_VRR_VVV000M },
+	{ "vmo", 0xa7, INSTR_VRR_VVV000M },
+	{ "vmlo", 0xa5, INSTR_VRR_VVV000M },
+	{ "vno", 0x6b, INSTR_VRR_VVV0000 },
+	{ "vo", 0x6a, INSTR_VRR_VVV0000 },
+	{ { 0, LONG_INSN_VPOPCT }, 0x50, INSTR_VRR_VV0000M },
+	{ { 0, LONG_INSN_VERLLV }, 0x73, INSTR_VRR_VVV000M },
+	{ "verll", 0x33, INSTR_VRS_VVRDM },
+	{ "verim", 0x72, INSTR_VRI_VVV0IM },
+	{ "veslv", 0x70, INSTR_VRR_VVV000M },
+	{ "vesl", 0x30, INSTR_VRS_VVRDM },
+	{ { 0, LONG_INSN_VESRAV }, 0x7a, INSTR_VRR_VVV000M },
+	{ "vesra", 0x3a, INSTR_VRS_VVRDM },
+	{ { 0, LONG_INSN_VESRLV }, 0x78, INSTR_VRR_VVV000M },
+	{ "vesrl", 0x38, INSTR_VRS_VVRDM },
+	{ "vsl", 0x74, INSTR_VRR_VVV0000 },
+	{ "vslb", 0x75, INSTR_VRR_VVV0000 },
+	{ "vsldb", 0x77, INSTR_VRI_VVV0I0 },
+	{ "vsra", 0x7e, INSTR_VRR_VVV0000 },
+	{ "vsrab", 0x7f, INSTR_VRR_VVV0000 },
+	{ "vsrl", 0x7c, INSTR_VRR_VVV0000 },
+	{ "vsrlb", 0x7d, INSTR_VRR_VVV0000 },
+	{ "vs", 0xf7, INSTR_VRR_VVV000M },
+	{ "vscb", 0xf5, INSTR_VRR_VVV000M },
+	{ "vsb", 0xbf, INSTR_VRR_VVVM00V },
+	{ { 0, LONG_INSN_VSBCBI }, 0xbd, INSTR_VRR_VVVM00V },
+	{ "vsumg", 0x65, INSTR_VRR_VVV000M },
+	{ "vsumq", 0x67, INSTR_VRR_VVV000M },
+	{ "vsum", 0x64, INSTR_VRR_VVV000M },
+	{ "vtm", 0xd8, INSTR_VRR_VV00000 },
+	{ "vfae", 0x82, INSTR_VRR_VVV0M0M },
+	{ "vfee", 0x80, INSTR_VRR_VVV0M0M },
+	{ "vfene", 0x81, INSTR_VRR_VVV0M0M },
+	{ "vistr", 0x5c, INSTR_VRR_VV00M0M },
+	{ "vstrc", 0x8a, INSTR_VRR_VVVMM0V },
+	{ "vfa", 0xe3, INSTR_VRR_VVV00MM },
+	{ "wfc", 0xcb, INSTR_VRR_VV000MM },
+	{ "wfk", 0xca, INSTR_VRR_VV000MM },
+	{ "vfce", 0xe8, INSTR_VRR_VVV0MMM },
+	{ "vfch", 0xeb, INSTR_VRR_VVV0MMM },
+	{ "vfche", 0xea, INSTR_VRR_VVV0MMM },
+	{ "vcdg", 0xc3, INSTR_VRR_VV00MMM },
+	{ "vcdlg", 0xc1, INSTR_VRR_VV00MMM },
+	{ "vcgd", 0xc2, INSTR_VRR_VV00MMM },
+	{ "vclgd", 0xc0, INSTR_VRR_VV00MMM },
+	{ "vfd", 0xe5, INSTR_VRR_VVV00MM },
+	{ "vfi", 0xc7, INSTR_VRR_VV00MMM },
+	{ "vlde", 0xc4, INSTR_VRR_VV000MM },
+	{ "vled", 0xc5, INSTR_VRR_VV00MMM },
+	{ "vfm", 0xe7, INSTR_VRR_VVV00MM },
+	{ "vfma", 0x8f, INSTR_VRR_VVVM0MV },
+	{ "vfms", 0x8e, INSTR_VRR_VVVM0MV },
+	{ "vfpso", 0xcc, INSTR_VRR_VV00MMM },
+	{ "vfsq", 0xce, INSTR_VRR_VV000MM },
+	{ "vfs", 0xe2, INSTR_VRR_VVV00MM },
+	{ "vftci", 0x4a, INSTR_VRI_VVIMM },
+};
+
+static struct s390_insn opcode_eb[] = {
+	{ "lmg", 0x04, INSTR_RSY_RRRD },
+	{ "srag", 0x0a, INSTR_RSY_RRRD },
+	{ "slag", 0x0b, INSTR_RSY_RRRD },
+	{ "srlg", 0x0c, INSTR_RSY_RRRD },
+	{ "sllg", 0x0d, INSTR_RSY_RRRD },
+	{ "tracg", 0x0f, INSTR_RSY_RRRD },
+	{ "csy", 0x14, INSTR_RSY_RRRD },
+	{ "rllg", 0x1c, INSTR_RSY_RRRD },
+	{ "clmh", 0x20, INSTR_RSY_RURD },
+	{ "clmy", 0x21, INSTR_RSY_RURD },
+	{ "clt", 0x23, INSTR_RSY_RURD },
+	{ "stmg", 0x24, INSTR_RSY_RRRD },
+	{ "stctg", 0x25, INSTR_RSY_CCRD },
+	{ "stmh", 0x26, INSTR_RSY_RRRD },
+	{ "clgt", 0x2b, INSTR_RSY_RURD },
+	{ "stcmh", 0x2c, INSTR_RSY_RURD },
+	{ "stcmy", 0x2d, INSTR_RSY_RURD },
+	{ "lctlg", 0x2f, INSTR_RSY_CCRD },
+	{ "csg", 0x30, INSTR_RSY_RRRD },
+	{ "cdsy", 0x31, INSTR_RSY_RRRD },
+	{ "cdsg", 0x3e, INSTR_RSY_RRRD },
+	{ "bxhg", 0x44, INSTR_RSY_RRRD },
+	{ "bxleg", 0x45, INSTR_RSY_RRRD },
+	{ "ecag", 0x4c, INSTR_RSY_RRRD },
+	{ "tmy", 0x51, INSTR_SIY_URD },
+	{ "mviy", 0x52, INSTR_SIY_URD },
+	{ "niy", 0x54, INSTR_SIY_URD },
+	{ "cliy", 0x55, INSTR_SIY_URD },
+	{ "oiy", 0x56, INSTR_SIY_URD },
+	{ "xiy", 0x57, INSTR_SIY_URD },
+	{ "asi", 0x6a, INSTR_SIY_IRD },
+	{ "alsi", 0x6e, INSTR_SIY_IRD },
+	{ "agsi", 0x7a, INSTR_SIY_IRD },
+	{ "algsi", 0x7e, INSTR_SIY_IRD },
+	{ "icmh", 0x80, INSTR_RSY_RURD },
+	{ "icmy", 0x81, INSTR_RSY_RURD },
+	{ "clclu", 0x8f, INSTR_RSY_RRRD },
+	{ "stmy", 0x90, INSTR_RSY_RRRD },
+	{ "lmh", 0x96, INSTR_RSY_RRRD },
+	{ "lmy", 0x98, INSTR_RSY_RRRD },
+	{ "lamy", 0x9a, INSTR_RSY_AARD },
+	{ "stamy", 0x9b, INSTR_RSY_AARD },
+	{ { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD },
+	{ "sic", 0xd1, INSTR_RSY_RRRD },
+	{ "srak", 0xdc, INSTR_RSY_RRRD },
+	{ "slak", 0xdd, INSTR_RSY_RRRD },
+	{ "srlk", 0xde, INSTR_RSY_RRRD },
+	{ "sllk", 0xdf, INSTR_RSY_RRRD },
+	{ "locg", 0xe2, INSTR_RSY_RDRM },
+	{ "stocg", 0xe3, INSTR_RSY_RDRM },
+	{ "lang", 0xe4, INSTR_RSY_RRRD },
+	{ "laog", 0xe6, INSTR_RSY_RRRD },
+	{ "laxg", 0xe7, INSTR_RSY_RRRD },
+	{ "laag", 0xe8, INSTR_RSY_RRRD },
+	{ "laalg", 0xea, INSTR_RSY_RRRD },
+	{ "loc", 0xf2, INSTR_RSY_RDRM },
+	{ "stoc", 0xf3, INSTR_RSY_RDRM },
+	{ "lan", 0xf4, INSTR_RSY_RRRD },
+	{ "lao", 0xf6, INSTR_RSY_RRRD },
+	{ "lax", 0xf7, INSTR_RSY_RRRD },
+	{ "laa", 0xf8, INSTR_RSY_RRRD },
+	{ "laal", 0xfa, INSTR_RSY_RRRD },
+	{ "lric", 0x60, INSTR_RSY_RDRM },
+	{ "stric", 0x61, INSTR_RSY_RDRM },
+	{ "mric", 0x62, INSTR_RSY_RDRM },
+	{ { 0, LONG_INSN_STCCTM }, 0x17, INSTR_RSY_RMRD },
+	{ "rll", 0x1d, INSTR_RSY_RRRD },
+	{ "mvclu", 0x8e, INSTR_RSY_RRRD },
+	{ "tp", 0xc0, INSTR_RSL_R0RD },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_ec[] = {
+	{ "brxhg", 0x44, INSTR_RIE_RRP },
+	{ "brxlg", 0x45, INSTR_RIE_RRP },
+	{ { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU },
+	{ "rnsbg", 0x54, INSTR_RIE_RRUUU },
+	{ "risbg", 0x55, INSTR_RIE_RRUUU },
+	{ "rosbg", 0x56, INSTR_RIE_RRUUU },
+	{ "rxsbg", 0x57, INSTR_RIE_RRUUU },
+	{ { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU },
+	{ { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU },
+	{ "cgrj", 0x64, INSTR_RIE_RRPU },
+	{ "clgrj", 0x65, INSTR_RIE_RRPU },
+	{ "cgit", 0x70, INSTR_RIE_R0IU },
+	{ "clgit", 0x71, INSTR_RIE_R0UU },
+	{ "cit", 0x72, INSTR_RIE_R0IU },
+	{ "clfit", 0x73, INSTR_RIE_R0UU },
+	{ "crj", 0x76, INSTR_RIE_RRPU },
+	{ "clrj", 0x77, INSTR_RIE_RRPU },
+	{ "cgij", 0x7c, INSTR_RIE_RUPI },
+	{ "clgij", 0x7d, INSTR_RIE_RUPU },
+	{ "cij", 0x7e, INSTR_RIE_RUPI },
+	{ "clij", 0x7f, INSTR_RIE_RUPU },
+	{ "ahik", 0xd8, INSTR_RIE_RRI0 },
+	{ "aghik", 0xd9, INSTR_RIE_RRI0 },
+	{ { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 },
+	{ { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 },
+	{ "cgrb", 0xe4, INSTR_RRS_RRRDU },
+	{ "clgrb", 0xe5, INSTR_RRS_RRRDU },
+	{ "crb", 0xf6, INSTR_RRS_RRRDU },
+	{ "clrb", 0xf7, INSTR_RRS_RRRDU },
+	{ "cgib", 0xfc, INSTR_RIS_RURDI },
+	{ "clgib", 0xfd, INSTR_RIS_RURDU },
+	{ "cib", 0xfe, INSTR_RIS_RURDI },
+	{ "clib", 0xff, INSTR_RIS_RURDU },
+	{ "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_ed[] = {
+	{ "mayl", 0x38, INSTR_RXF_FRRDF },
+	{ "myl", 0x39, INSTR_RXF_FRRDF },
+	{ "may", 0x3a, INSTR_RXF_FRRDF },
+	{ "my", 0x3b, INSTR_RXF_FRRDF },
+	{ "mayh", 0x3c, INSTR_RXF_FRRDF },
+	{ "myh", 0x3d, INSTR_RXF_FRRDF },
+	{ "sldt", 0x40, INSTR_RXF_FRRDF },
+	{ "srdt", 0x41, INSTR_RXF_FRRDF },
+	{ "slxt", 0x48, INSTR_RXF_FRRDF },
+	{ "srxt", 0x49, INSTR_RXF_FRRDF },
+	{ "tdcet", 0x50, INSTR_RXE_FRRD },
+	{ "tdget", 0x51, INSTR_RXE_FRRD },
+	{ "tdcdt", 0x54, INSTR_RXE_FRRD },
+	{ "tdgdt", 0x55, INSTR_RXE_FRRD },
+	{ "tdcxt", 0x58, INSTR_RXE_FRRD },
+	{ "tdgxt", 0x59, INSTR_RXE_FRRD },
+	{ "ley", 0x64, INSTR_RXY_FRRD },
+	{ "ldy", 0x65, INSTR_RXY_FRRD },
+	{ "stey", 0x66, INSTR_RXY_FRRD },
+	{ "stdy", 0x67, INSTR_RXY_FRRD },
+	{ "czdt", 0xa8, INSTR_RSL_LRDFU },
+	{ "czxt", 0xa9, INSTR_RSL_LRDFU },
+	{ "cdzt", 0xaa, INSTR_RSL_LRDFU },
+	{ "cxzt", 0xab, INSTR_RSL_LRDFU },
+	{ "ldeb", 0x04, INSTR_RXE_FRRD },
+	{ "lxdb", 0x05, INSTR_RXE_FRRD },
+	{ "lxeb", 0x06, INSTR_RXE_FRRD },
+	{ "mxdb", 0x07, INSTR_RXE_FRRD },
+	{ "keb", 0x08, INSTR_RXE_FRRD },
+	{ "ceb", 0x09, INSTR_RXE_FRRD },
+	{ "aeb", 0x0a, INSTR_RXE_FRRD },
+	{ "seb", 0x0b, INSTR_RXE_FRRD },
+	{ "mdeb", 0x0c, INSTR_RXE_FRRD },
+	{ "deb", 0x0d, INSTR_RXE_FRRD },
+	{ "maeb", 0x0e, INSTR_RXF_FRRDF },
+	{ "mseb", 0x0f, INSTR_RXF_FRRDF },
+	{ "tceb", 0x10, INSTR_RXE_FRRD },
+	{ "tcdb", 0x11, INSTR_RXE_FRRD },
+	{ "tcxb", 0x12, INSTR_RXE_FRRD },
+	{ "sqeb", 0x14, INSTR_RXE_FRRD },
+	{ "sqdb", 0x15, INSTR_RXE_FRRD },
+	{ "meeb", 0x17, INSTR_RXE_FRRD },
+	{ "kdb", 0x18, INSTR_RXE_FRRD },
+	{ "cdb", 0x19, INSTR_RXE_FRRD },
+	{ "adb", 0x1a, INSTR_RXE_FRRD },
+	{ "sdb", 0x1b, INSTR_RXE_FRRD },
+	{ "mdb", 0x1c, INSTR_RXE_FRRD },
+	{ "ddb", 0x1d, INSTR_RXE_FRRD },
+	{ "madb", 0x1e, INSTR_RXF_FRRDF },
+	{ "msdb", 0x1f, INSTR_RXF_FRRDF },
+	{ "lde", 0x24, INSTR_RXE_FRRD },
+	{ "lxd", 0x25, INSTR_RXE_FRRD },
+	{ "lxe", 0x26, INSTR_RXE_FRRD },
+	{ "mae", 0x2e, INSTR_RXF_FRRDF },
+	{ "mse", 0x2f, INSTR_RXF_FRRDF },
+	{ "sqe", 0x34, INSTR_RXE_FRRD },
+	{ "sqd", 0x35, INSTR_RXE_FRRD },
+	{ "mee", 0x37, INSTR_RXE_FRRD },
+	{ "mad", 0x3e, INSTR_RXF_FRRDF },
+	{ "msd", 0x3f, INSTR_RXF_FRRDF },
+	{ "", 0, INSTR_INVALID }
+};
+
+/* Extracts an operand value from an instruction.  */
+static unsigned int extract_operand(unsigned char *code,
+				    const struct s390_operand *operand)
+{
+	unsigned char *cp;
+	unsigned int val;
+	int bits;
+
+	/* Extract fragments of the operand byte for byte.  */
+	cp = code + operand->shift / 8;
+	bits = (operand->shift & 7) + operand->bits;
+	val = 0;
+	do {
+		val <<= 8;
+		val |= (unsigned int) *cp++;
+		bits -= 8;
+	} while (bits > 0);
+	val >>= -bits;
+	val &= ((1U << (operand->bits - 1)) << 1) - 1;
+
+	/* Check for special long displacement case.  */
+	if (operand->bits == 20 && operand->shift == 20)
+		val = (val & 0xff) << 12 | (val & 0xfff00) >> 8;
+
+	/* Check for register extensions bits for vector registers. */
+	if (operand->flags & OPERAND_VR) {
+		if (operand->shift == 8)
+			val |= (code[4] & 8) << 1;
+		else if (operand->shift == 12)
+			val |= (code[4] & 4) << 2;
+		else if (operand->shift == 16)
+			val |= (code[4] & 2) << 3;
+		else if (operand->shift == 32)
+			val |= (code[4] & 1) << 4;
+	}
+
+	/* Sign extend value if the operand is signed or pc relative.  */
+	if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) &&
+	    (val & (1U << (operand->bits - 1))))
+		val |= (-1U << (operand->bits - 1)) << 1;
+
+	/* Double value if the operand is pc relative.	*/
+	if (operand->flags & OPERAND_PCREL)
+		val <<= 1;
+
+	/* Length x in an instructions has real length x + 1.  */
+	if (operand->flags & OPERAND_LENGTH)
+		val++;
+	return val;
+}
+
+struct s390_insn *find_insn(unsigned char *code)
+{
+	unsigned char opfrag = code[1];
+	unsigned char opmask;
+	struct s390_insn *table;
+
+	switch (code[0]) {
+	case 0x01:
+		table = opcode_01;
+		break;
+	case 0xa5:
+		table = opcode_a5;
+		break;
+	case 0xa7:
+		table = opcode_a7;
+		break;
+	case 0xaa:
+		table = opcode_aa;
+		break;
+	case 0xb2:
+		table = opcode_b2;
+		break;
+	case 0xb3:
+		table = opcode_b3;
+		break;
+	case 0xb9:
+		table = opcode_b9;
+		break;
+	case 0xc0:
+		table = opcode_c0;
+		break;
+	case 0xc2:
+		table = opcode_c2;
+		break;
+	case 0xc4:
+		table = opcode_c4;
+		break;
+	case 0xc6:
+		table = opcode_c6;
+		break;
+	case 0xc8:
+		table = opcode_c8;
+		break;
+	case 0xcc:
+		table = opcode_cc;
+		break;
+	case 0xe3:
+		table = opcode_e3;
+		opfrag = code[5];
+		break;
+	case 0xe5:
+		table = opcode_e5;
+		break;
+	case 0xe7:
+		table = opcode_e7;
+		opfrag = code[5];
+		break;
+	case 0xeb:
+		table = opcode_eb;
+		opfrag = code[5];
+		break;
+	case 0xec:
+		table = opcode_ec;
+		opfrag = code[5];
+		break;
+	case 0xed:
+		table = opcode_ed;
+		opfrag = code[5];
+		break;
+	default:
+		table = opcode;
+		opfrag = code[0];
+		break;
+	}
+	while (table->format != INSTR_INVALID) {
+		opmask = formats[table->format][0];
+		if (table->opfrag == (opfrag & opmask))
+			return table;
+		table++;
+	}
+	return NULL;
+}
+
+/**
+ * insn_to_mnemonic - decode an s390 instruction
+ * @instruction: instruction to decode
+ * @buf: buffer to fill with mnemonic
+ * @len: length of buffer
+ *
+ * Decode the instruction at @instruction and store the corresponding
+ * mnemonic into @buf of length @len.
+ * @buf is left unchanged if the instruction could not be decoded.
+ * Returns:
+ *  %0 on success, %-ENOENT if the instruction was not found.
+ */
+int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len)
+{
+	struct s390_insn *insn;
+
+	insn = find_insn(instruction);
+	if (!insn)
+		return -ENOENT;
+	if (insn->name[0] == '\0')
+		snprintf(buf, len, "%s",
+			 long_insn_name[(int) insn->name[1]]);
+	else
+		snprintf(buf, len, "%.5s", insn->name);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(insn_to_mnemonic);
+
+static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
+{
+	struct s390_insn *insn;
+	const unsigned char *ops;
+	const struct s390_operand *operand;
+	unsigned int value;
+	char separator;
+	char *ptr;
+	int i;
+
+	ptr = buffer;
+	insn = find_insn(code);
+	if (insn) {
+		if (insn->name[0] == '\0')
+			ptr += sprintf(ptr, "%s\t",
+				       long_insn_name[(int) insn->name[1]]);
+		else
+			ptr += sprintf(ptr, "%.5s\t", insn->name);
+		/* Extract the operands. */
+		separator = 0;
+		for (ops = formats[insn->format] + 1, i = 0;
+		     *ops != 0 && i < 6; ops++, i++) {
+			operand = operands + *ops;
+			value = extract_operand(code, operand);
+			if ((operand->flags & OPERAND_INDEX)  && value == 0)
+				continue;
+			if ((operand->flags & OPERAND_BASE) &&
+			    value == 0 && separator == '(') {
+				separator = ',';
+				continue;
+			}
+			if (separator)
+				ptr += sprintf(ptr, "%c", separator);
+			if (operand->flags & OPERAND_GPR)
+				ptr += sprintf(ptr, "%%r%i", value);
+			else if (operand->flags & OPERAND_FPR)
+				ptr += sprintf(ptr, "%%f%i", value);
+			else if (operand->flags & OPERAND_AR)
+				ptr += sprintf(ptr, "%%a%i", value);
+			else if (operand->flags & OPERAND_CR)
+				ptr += sprintf(ptr, "%%c%i", value);
+			else if (operand->flags & OPERAND_VR)
+				ptr += sprintf(ptr, "%%v%i", value);
+			else if (operand->flags & OPERAND_PCREL)
+				ptr += sprintf(ptr, "%lx", (signed int) value
+								      + addr);
+			else if (operand->flags & OPERAND_SIGNED)
+				ptr += sprintf(ptr, "%i", value);
+			else
+				ptr += sprintf(ptr, "%u", value);
+			if (operand->flags & OPERAND_DISP)
+				separator = '(';
+			else if (operand->flags & OPERAND_BASE) {
+				ptr += sprintf(ptr, ")");
+				separator = ',';
+			} else
+				separator = ',';
+		}
+	} else
+		ptr += sprintf(ptr, "unknown");
+	return (int) (ptr - buffer);
+}
+
+void show_code(struct pt_regs *regs)
+{
+	char *mode = user_mode(regs) ? "User" : "Krnl";
+	unsigned char code[64];
+	char buffer[64], *ptr;
+	mm_segment_t old_fs;
+	unsigned long addr;
+	int start, end, opsize, hops, i;
+
+	/* Get a snapshot of the 64 bytes surrounding the fault address. */
+	old_fs = get_fs();
+	set_fs(user_mode(regs) ? USER_DS : KERNEL_DS);
+	for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
+		addr = regs->psw.addr - 34 + start;
+		if (__copy_from_user(code + start - 2,
+				     (char __user *) addr, 2))
+			break;
+	}
+	for (end = 32; end < 64; end += 2) {
+		addr = regs->psw.addr + end - 32;
+		if (__copy_from_user(code + end,
+				     (char __user *) addr, 2))
+			break;
+	}
+	set_fs(old_fs);
+	/* Code snapshot useable ? */
+	if ((regs->psw.addr & 1) || start >= end) {
+		printk("%s Code: Bad PSW.\n", mode);
+		return;
+	}
+	/* Find a starting point for the disassembly. */
+	while (start < 32) {
+		for (i = 0, hops = 0; start + i < 32 && hops < 3; hops++) {
+			if (!find_insn(code + start + i))
+				break;
+			i += insn_length(code[start + i]);
+		}
+		if (start + i == 32)
+			/* Looks good, sequence ends at PSW. */
+			break;
+		start += 2;
+	}
+	/* Decode the instructions. */
+	ptr = buffer;
+	ptr += sprintf(ptr, "%s Code:", mode);
+	hops = 0;
+	while (start < end && hops < 8) {
+		opsize = insn_length(code[start]);
+		if  (start + opsize == 32)
+			*ptr++ = '#';
+		else if (start == 32)
+			*ptr++ = '>';
+		else
+			*ptr++ = ' ';
+		addr = regs->psw.addr + start - 32;
+		ptr += sprintf(ptr, "%016lx: ", addr);
+		if (start + opsize >= end)
+			break;
+		for (i = 0; i < opsize; i++)
+			ptr += sprintf(ptr, "%02x", code[start + i]);
+		*ptr++ = '\t';
+		if (i < 6)
+			*ptr++ = '\t';
+		ptr += print_insn(ptr, code + start, addr);
+		start += opsize;
+		printk(buffer);
+		ptr = buffer;
+		ptr += sprintf(ptr, "\n          ");
+		hops++;
+	}
+	printk("\n");
+}
+
+void print_fn_code(unsigned char *code, unsigned long len)
+{
+	char buffer[64], *ptr;
+	int opsize, i;
+
+	while (len) {
+		ptr = buffer;
+		opsize = insn_length(*code);
+		if (opsize > len)
+			break;
+		ptr += sprintf(ptr, "%p: ", code);
+		for (i = 0; i < opsize; i++)
+			ptr += sprintf(ptr, "%02x", code[i]);
+		*ptr++ = '\t';
+		if (i < 4)
+			*ptr++ = '\t';
+		ptr += print_insn(ptr, code, (unsigned long) code);
+		*ptr++ = '\n';
+		*ptr++ = 0;
+		printk(buffer);
+		code += opsize;
+		len -= opsize;
+	}
+}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
new file mode 100644
index 000000000..dc8e20473
--- /dev/null
+++ b/arch/s390/kernel/dumpstack.c
@@ -0,0 +1,204 @@
+/*
+ * Stack dumping functions
+ *
+ *  Copyright IBM Corp. 1999, 2013
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/utsname.h>
+#include <linux/export.h>
+#include <linux/kdebug.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+#include <asm/ipl.h>
+
+/*
+ * For show_trace we have tree different stack to consider:
+ *   - the panic stack which is used if the kernel stack has overflown
+ *   - the asynchronous interrupt stack (cpu related)
+ *   - the synchronous kernel stack (process related)
+ * The stack trace can start at any of the three stack and can potentially
+ * touch all of them. The order is: panic stack, async stack, sync stack.
+ */
+static unsigned long
+__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+{
+	struct stack_frame *sf;
+	struct pt_regs *regs;
+	unsigned long addr;
+
+	while (1) {
+		sp = sp & PSW_ADDR_INSN;
+		if (sp < low || sp > high - sizeof(*sf))
+			return sp;
+		sf = (struct stack_frame *) sp;
+		addr = sf->gprs[8] & PSW_ADDR_INSN;
+		printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
+		/* Follow the backchain. */
+		while (1) {
+			low = sp;
+			sp = sf->back_chain & PSW_ADDR_INSN;
+			if (!sp)
+				break;
+			if (sp <= low || sp > high - sizeof(*sf))
+				return sp;
+			sf = (struct stack_frame *) sp;
+			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+		}
+		/* Zero backchain detected, check for interrupt frame. */
+		sp = (unsigned long) (sf + 1);
+		if (sp <= low || sp > high - sizeof(*regs))
+			return sp;
+		regs = (struct pt_regs *) sp;
+		addr = regs->psw.addr & PSW_ADDR_INSN;
+		printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+		low = sp;
+		sp = regs->gprs[15];
+	}
+}
+
+static void show_trace(struct task_struct *task, unsigned long *stack)
+{
+	const unsigned long frame_size =
+		STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+	register unsigned long __r15 asm ("15");
+	unsigned long sp;
+
+	sp = (unsigned long) stack;
+	if (!sp)
+		sp = task ? task->thread.ksp : __r15;
+	printk("Call Trace:\n");
+#ifdef CONFIG_CHECK_STACK
+	sp = __show_trace(sp,
+			  S390_lowcore.panic_stack + frame_size - 4096,
+			  S390_lowcore.panic_stack + frame_size);
+#endif
+	sp = __show_trace(sp,
+			  S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+			  S390_lowcore.async_stack + frame_size);
+	if (task)
+		__show_trace(sp, (unsigned long) task_stack_page(task),
+			     (unsigned long) task_stack_page(task) + THREAD_SIZE);
+	else
+		__show_trace(sp, S390_lowcore.thread_info,
+			     S390_lowcore.thread_info + THREAD_SIZE);
+	if (!task)
+		task = current;
+	debug_show_held_locks(task);
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+	register unsigned long *__r15 asm ("15");
+	unsigned long *stack;
+	int i;
+
+	if (!sp)
+		stack = task ? (unsigned long *) task->thread.ksp : __r15;
+	else
+		stack = sp;
+
+	for (i = 0; i < 20; i++) {
+		if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
+			break;
+		if ((i * sizeof(long) % 32) == 0)
+			printk("%s       ", i == 0 ? "" : "\n");
+		printk("%016lx ", *stack++);
+	}
+	printk("\n");
+	show_trace(task, sp);
+}
+
+static void show_last_breaking_event(struct pt_regs *regs)
+{
+	printk("Last Breaking-Event-Address:\n");
+	printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
+}
+
+static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
+{
+	return (regs->psw.mask & bits) / ((~bits + 1) & bits);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+	char *mode;
+
+	mode = user_mode(regs) ? "User" : "Krnl";
+	printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+	if (!user_mode(regs))
+		printk(" (%pSR)", (void *)regs->psw.addr);
+	printk("\n");
+	printk("           R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
+	       "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
+	       mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
+	       mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
+	       mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
+	       mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
+	       mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
+	printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
+	printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
+	       regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
+	show_code(regs);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	show_regs_print_info(KERN_DEFAULT);
+	show_registers(regs);
+	/* Show stack backtrace if pt_regs is from kernel mode */
+	if (!user_mode(regs))
+		show_trace(NULL, (unsigned long *) regs->gprs[15]);
+	show_last_breaking_event(regs);
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void die(struct pt_regs *regs, const char *str)
+{
+	static int die_counter;
+
+	oops_enter();
+	lgr_info_log();
+	debug_stop_all();
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+	printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+	       regs->int_code >> 17, ++die_counter);
+#ifdef CONFIG_PREEMPT
+	printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+	printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	printk("DEBUG_PAGEALLOC");
+#endif
+	printk("\n");
+	notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
+	print_modules();
+	show_regs(regs);
+	bust_spinlocks(0);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	spin_unlock_irq(&die_lock);
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+	if (panic_on_oops)
+		panic("Fatal exception: panic_on_oops");
+	oops_exit();
+	do_exit(SIGSEGV);
+}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
new file mode 100644
index 000000000..549a73a4b
--- /dev/null
+++ b/arch/s390/kernel/early.c
@@ -0,0 +1,444 @@
+/*
+ *    Copyright IBM Corp. 2007, 2009
+ *    Author(s): Hongjie Yang <hongjie@us.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/lockdep.h>
+#include <linux/module.h>
+#include <linux/pfn.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/sclp.h>
+#include <asm/facility.h>
+#include "entry.h"
+
+/*
+ * Create a Kernel NSS if the SAVESYS= parameter is defined
+ */
+#define DEFSYS_CMD_SIZE		128
+#define SAVESYS_CMD_SIZE	32
+
+char kernel_nss_name[NSS_NAME_SIZE + 1];
+
+static void __init setup_boot_command_line(void);
+
+/*
+ * Get the TOD clock running.
+ */
+static void __init reset_tod_clock(void)
+{
+	u64 time;
+
+	if (store_tod_clock(&time) == 0)
+		return;
+	/* TOD clock not running. Set the clock to Unix Epoch. */
+	if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
+		disabled_wait(0);
+
+	sched_clock_base_cc = TOD_UNIX_EPOCH;
+	S390_lowcore.last_update_clock = sched_clock_base_cc;
+}
+
+#ifdef CONFIG_SHARED_KERNEL
+int __init savesys_ipl_nss(char *cmd, const int cmdlen);
+
+asm(
+	"	.section .init.text,\"ax\",@progbits\n"
+	"	.align	4\n"
+	"	.type	savesys_ipl_nss, @function\n"
+	"savesys_ipl_nss:\n"
+	"	stmg	6,15,48(15)\n"
+	"	lgr	14,3\n"
+	"	sam31\n"
+	"	diag	2,14,0x8\n"
+	"	sam64\n"
+	"	lgr	2,14\n"
+	"	lmg	6,15,48(15)\n"
+	"	br	14\n"
+	"	.size	savesys_ipl_nss, .-savesys_ipl_nss\n"
+	"	.previous\n");
+
+static __initdata char upper_command_line[COMMAND_LINE_SIZE];
+
+static noinline __init void create_kernel_nss(void)
+{
+	unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
+#ifdef CONFIG_BLK_DEV_INITRD
+	unsigned int sinitrd_pfn, einitrd_pfn;
+#endif
+	int response;
+	int hlen;
+	size_t len;
+	char *savesys_ptr;
+	char defsys_cmd[DEFSYS_CMD_SIZE];
+	char savesys_cmd[SAVESYS_CMD_SIZE];
+
+	/* Do nothing if we are not running under VM */
+	if (!MACHINE_IS_VM)
+		return;
+
+	/* Convert COMMAND_LINE to upper case */
+	for (i = 0; i < strlen(boot_command_line); i++)
+		upper_command_line[i] = toupper(boot_command_line[i]);
+
+	savesys_ptr = strstr(upper_command_line, "SAVESYS=");
+
+	if (!savesys_ptr)
+		return;
+
+	savesys_ptr += 8;    /* Point to the beginning of the NSS name */
+	for (i = 0; i < NSS_NAME_SIZE; i++) {
+		if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0')
+			break;
+		kernel_nss_name[i] = savesys_ptr[i];
+	}
+
+	stext_pfn = PFN_DOWN(__pa(&_stext));
+	eshared_pfn = PFN_DOWN(__pa(&_eshared));
+	end_pfn = PFN_UP(__pa(&_end));
+	min_size = end_pfn << 2;
+
+	hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE,
+			"DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
+			kernel_nss_name, stext_pfn - 1, stext_pfn,
+			eshared_pfn - 1, eshared_pfn, end_pfn);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (INITRD_START && INITRD_SIZE) {
+		sinitrd_pfn = PFN_DOWN(__pa(INITRD_START));
+		einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE));
+		min_size = einitrd_pfn << 2;
+		hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+				 " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn);
+	}
+#endif
+
+	snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+		 " EW MINSIZE=%.7iK PARMREGS=0-13", min_size);
+	defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0';
+	snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s",
+		 kernel_nss_name, kernel_nss_name);
+	savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0';
+
+	__cpcmd(defsys_cmd, NULL, 0, &response);
+
+	if (response != 0) {
+		pr_err("Defining the Linux kernel NSS failed with rc=%d\n",
+			response);
+		kernel_nss_name[0] = '\0';
+		return;
+	}
+
+	len = strlen(savesys_cmd);
+	ASCEBC(savesys_cmd, len);
+	response = savesys_ipl_nss(savesys_cmd, len);
+
+	/* On success: response is equal to the command size,
+	 *	       max SAVESYS_CMD_SIZE
+	 * On error: response contains the numeric portion of cp error message.
+	 *	     for SAVESYS it will be >= 263
+	 *	     for missing privilege class, it will be 1
+	 */
+	if (response > SAVESYS_CMD_SIZE || response == 1) {
+		pr_err("Saving the Linux kernel NSS failed with rc=%d\n",
+			response);
+		kernel_nss_name[0] = '\0';
+		return;
+	}
+
+	/* re-initialize cputime accounting. */
+	sched_clock_base_cc = get_tod_clock();
+	S390_lowcore.last_update_clock = sched_clock_base_cc;
+	S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
+	S390_lowcore.user_timer = 0;
+	S390_lowcore.system_timer = 0;
+	asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer));
+
+	/* re-setup boot command line with new ipl vm parms */
+	ipl_update_parameters();
+	setup_boot_command_line();
+
+	ipl_flags = IPL_NSS_VALID;
+}
+
+#else /* CONFIG_SHARED_KERNEL */
+
+static inline void create_kernel_nss(void) { }
+
+#endif /* CONFIG_SHARED_KERNEL */
+
+/*
+ * Clear bss memory
+ */
+static noinline __init void clear_bss_section(void)
+{
+	memset(__bss_start, 0, __bss_stop - __bss_start);
+}
+
+/*
+ * Initialize storage key for kernel pages
+ */
+static noinline __init void init_kernel_storage_key(void)
+{
+#if PAGE_DEFAULT_KEY
+	unsigned long end_pfn, init_pfn;
+
+	end_pfn = PFN_UP(__pa(&_end));
+
+	for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
+		page_set_storage_key(init_pfn << PAGE_SHIFT,
+				     PAGE_DEFAULT_KEY, 0);
+#endif
+}
+
+static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static noinline __init void detect_machine_type(void)
+{
+	struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
+	/* Check current-configuration-level */
+	if (stsi(NULL, 0, 0, 0) <= 2) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+		return;
+	}
+	/* Get virtual-machine cpu information. */
+	if (stsi(vmms, 3, 2, 2) || !vmms->count)
+		return;
+
+	/* Running under KVM? If not we assume z/VM */
+	if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
+	else
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
+}
+
+static __init void setup_topology(void)
+{
+	int max_mnest;
+
+	if (!test_facility(11))
+		return;
+	S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+	for (max_mnest = 6; max_mnest > 1; max_mnest--) {
+		if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
+			break;
+	}
+	topology_max_mnest = max_mnest;
+}
+
+static void early_pgm_check_handler(void)
+{
+	const struct exception_table_entry *fixup;
+	unsigned long cr0, cr0_new;
+	unsigned long addr;
+
+	addr = S390_lowcore.program_old_psw.addr;
+	fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+	if (!fixup)
+		disabled_wait(0);
+	/* Disable low address protection before storing into lowcore. */
+	__ctl_store(cr0, 0, 0);
+	cr0_new = cr0 & ~(1UL << 28);
+	__ctl_load(cr0_new, 0, 0);
+	S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+	__ctl_load(cr0, 0, 0);
+}
+
+static noinline __init void setup_lowcore_early(void)
+{
+	psw_t psw;
+
+	psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
+	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+	S390_lowcore.external_new_psw = psw;
+	psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+	S390_lowcore.program_new_psw = psw;
+	s390_base_pgm_handler_fn = early_pgm_check_handler;
+}
+
+static noinline __init void setup_facility_list(void)
+{
+	stfle(S390_lowcore.stfle_fac_list,
+	      ARRAY_SIZE(S390_lowcore.stfle_fac_list));
+}
+
+static __init void detect_diag9c(void)
+{
+	unsigned int cpu_address;
+	int rc;
+
+	cpu_address = stap();
+	asm volatile(
+		"	diag	%2,0,0x9c\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
+	if (!rc)
+		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
+}
+
+static __init void detect_diag44(void)
+{
+	int rc;
+
+	asm volatile(
+		"	diag	0,0,0x44\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
+	if (!rc)
+		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
+}
+
+static __init void detect_machine_facilities(void)
+{
+	if (test_facility(8)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+		__ctl_set_bit(0, 23);
+	}
+	if (test_facility(78))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
+	if (test_facility(3))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
+	if (test_facility(40))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
+	if (test_facility(50) && test_facility(73))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+	if (test_facility(51))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+	if (test_facility(129))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+}
+
+static int __init cad_setup(char *str)
+{
+	int val;
+
+	get_option(&str, &val);
+	if (val && test_facility(128))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
+	return 0;
+}
+early_param("cad", cad_setup);
+
+static int __init cad_init(void)
+{
+	if (MACHINE_HAS_CAD)
+		/* Enable problem state CAD. */
+		__ctl_set_bit(2, 3);
+	return 0;
+}
+early_initcall(cad_init);
+
+static __init void rescue_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
+	/*
+	 * Just like in case of IPL from VM reader we make sure there is a
+	 * gap of 4MB between end of kernel and start of initrd.
+	 * That way we can also be sure that saving an NSS will succeed,
+	 * which however only requires different segments.
+	 */
+	if (!INITRD_START || !INITRD_SIZE)
+		return;
+	if (INITRD_START >= min_initrd_addr)
+		return;
+	memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+	INITRD_START = min_initrd_addr;
+#endif
+}
+
+/* Set up boot command line */
+static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
+{
+	char *parm, *delim;
+	size_t rc, len;
+
+	len = strlen(boot_command_line);
+
+	delim = boot_command_line + len;	/* '\0' character position */
+	parm  = boot_command_line + len + 1;	/* append right after '\0' */
+
+	rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
+	if (rc) {
+		if (*parm == '=')
+			memmove(boot_command_line, parm + 1, rc);
+		else
+			*delim = ' ';		/* replace '\0' with space */
+	}
+}
+
+static inline int has_ebcdic_char(const char *str)
+{
+	int i;
+
+	for (i = 0; str[i]; i++)
+		if (str[i] & 0x80)
+			return 1;
+	return 0;
+}
+
+static void __init setup_boot_command_line(void)
+{
+	COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0;
+	/* convert arch command line to ascii if necessary */
+	if (has_ebcdic_char(COMMAND_LINE))
+		EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
+	/* copy arch command line */
+	strlcpy(boot_command_line, strstrip(COMMAND_LINE),
+		ARCH_COMMAND_LINE_SIZE);
+
+	/* append IPL PARM data to the boot command line */
+	if (MACHINE_IS_VM)
+		append_to_cmdline(append_ipl_vmparm);
+
+	append_to_cmdline(append_ipl_scpdata);
+}
+
+/*
+ * Save ipl parameters, clear bss memory, initialize storage keys
+ * and create a kernel NSS at startup if the SAVESYS= parm is defined
+ */
+void __init startup_init(void)
+{
+	reset_tod_clock();
+	ipl_save_parameters();
+	rescue_initrd();
+	clear_bss_section();
+	init_kernel_storage_key();
+	lockdep_init();
+	lockdep_off();
+	setup_lowcore_early();
+	setup_facility_list();
+	detect_machine_type();
+	ipl_update_parameters();
+	setup_boot_command_line();
+	create_kernel_nss();
+	detect_diag9c();
+	detect_diag44();
+	detect_machine_facilities();
+	setup_topology();
+	sclp_early_detect();
+	lockdep_on();
+}
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
new file mode 100644
index 000000000..b971c6be6
--- /dev/null
+++ b/arch/s390/kernel/ebcdic.c
@@ -0,0 +1,400 @@
+/*
+ *    ECBDIC -> ASCII, ASCII -> ECBDIC,
+ *    upper to lower case (EBCDIC) conversion tables.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *               Martin Peschke <peschke@fh-brandenburg.de>
+ */
+
+#include <linux/module.h>
+#include <asm/types.h>
+#include <asm/ebcdic.h>
+
+/*
+ * ASCII (IBM PC 437)  -> EBCDIC 037
+ */
+__u8 _ascebc[256] =
+{
+ /*00 NUL   SOH   STX   ETX   EOT   ENQ   ACK   BEL */
+     0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08  BS    HT    LF    VT    FF    CR    SO    SI */
+ /*              ->NL                               */
+     0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE   DC1   DC2   DC3   DC4   NAK   SYN   ETB */
+     0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN    EM   SUB   ESC    FS    GS    RS    US */
+ /*                               ->IGS ->IRS ->IUS */
+     0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20  SP     !     "     #     $     %     &     ' */
+     0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28   (     )     *     +     ,     -    .      / */
+     0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30   0     1     2     3     4     5     6     7 */
+     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38   8     9     :     ;     <     =     >     ? */
+     0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40   @     A     B     C     D     E     F     G */
+     0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48   H     I     J     K     L     M     N     O */
+     0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50   P     Q     R     S     T     U     V     W */
+     0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58   X     Y     Z     [     \     ]     ^     _ */
+     0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
+ /*60   `     a     b     c     d     e     f     g */
+     0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68   h     i     j     k     l     m     n     o */
+     0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70   p     q     r     s     t     u     v     w */
+     0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78   x     y     z     {     |     }     ~    DL */
+     0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
+ /*80*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0        sz						*/
+     0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+     0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 037 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc[256] =
+{
+ /* 0x00   NUL   SOH   STX   ETX  *SEL    HT  *RNL   DEL */
+          0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08   -GE  -SPS  -RPT    VT    FF    CR    SO    SI */
+          0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10   DLE   DC1   DC2   DC3  -RES   -NL    BS  -POC
+                                  -ENP  ->LF             */
+          0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18   CAN    EM  -UBS  -CU1  -IFS  -IGS  -IRS  -ITB
+                                                    -IUS */
+          0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20   -DS  -SOS    FS  -WUS  -BYP    LF   ETB   ESC
+                                  -INP                   */
+          0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28   -SA  -SFE   -SM  -CSP  -MFA   ENQ   ACK   BEL
+                       -SW                               */ 
+          0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30  ----  ----   SYN   -IR   -PP  -TRN  -NBS   EOT */
+          0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38  -SBS   -IT  -RFF  -CU3   DC4   NAK  ----   SUB */
+          0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40    SP   RSP           ��              ----       */
+          0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48                       .     <     (     +     | */
+          0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+ /* 0x50     &                                      ---- */
+          0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58           ��     !     $     *     )     ;       */
+          0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA,
+ /* 0x60     -     /  ----     ��  ----  ----  ----       */
+          0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68              ----     ,     %     _     >     ? */ 
+          0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70  ----        ----  ----  ----  ----  ----  ---- */
+          0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78     *     `     :     #     @     '     =     " */
+          0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80     *     a     b     c     d     e     f     g */
+          0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88     h     i              ----  ----  ----       */
+          0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90     ��     j     k     l     m     n     o     p */
+          0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98     q     r                    ----        ---- */
+          0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0           ~     s     t     u     v     w     x */
+          0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8     y     z              ----  ----  ----  ---- */
+          0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0     ^                    ----     ��  ----       */
+          0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8        ----     [     ]  ----  ----  ----  ---- */
+          0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0     {     A     B     C     D     E     F     G */
+          0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8     H     I  ----           ��              ---- */
+          0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0     }     J     K     L     M     N     O     P */
+          0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8     Q     R  ----           ��                   */
+          0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0     \           S     T     U     V     W     X */
+          0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8     Y     Z        ----     ��  ----  ----  ---- */
+          0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0     0     1     2     3     4     5     6     7 */
+          0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8     8     9  ----  ----     ��  ----  ----  ---- */
+          0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * ASCII (IBM PC 437)  -> EBCDIC 500
+ */
+__u8 _ascebc_500[256] =
+{
+ /*00 NUL   SOH   STX   ETX   EOT   ENQ   ACK   BEL */
+     0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08  BS    HT    LF    VT    FF    CR    SO    SI */
+ /*              ->NL                               */
+     0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE   DC1   DC2   DC3   DC4   NAK   SYN   ETB */
+     0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN    EM   SUB   ESC    FS    GS    RS    US */
+ /*                               ->IGS ->IRS ->IUS */
+     0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20  SP     !     "     #     $     %     &     ' */
+     0x40, 0x4F, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28   (     )     *     +     ,     -    .      / */
+     0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30   0     1     2     3     4     5     6     7 */
+     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38   8     9     :     ;     <     =     >     ? */
+     0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40   @     A     B     C     D     E     F     G */
+     0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48   H     I     J     K     L     M     N     O */
+     0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50   P     Q     R     S     T     U     V     W */
+     0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58   X     Y     Z     [     \     ]     ^     _ */
+     0xE7, 0xE8, 0xE9, 0x4A, 0xE0, 0x5A, 0x5F, 0x6D,
+ /*60   `     a     b     c     d     e     f     g */
+     0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68   h     i     j     k     l     m     n     o */
+     0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70   p     q     r     s     t     u     v     w */
+     0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78   x     y     z     {     |     }     ~    DL */
+     0xA7, 0xA8, 0xA9, 0xC0, 0xBB, 0xD0, 0xA1, 0x07,
+ /*80*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0        sz						*/
+     0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+     0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 500 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc_500[256] =
+{
+ /* 0x00   NUL   SOH   STX   ETX  *SEL    HT  *RNL   DEL */
+          0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08   -GE  -SPS  -RPT    VT    FF    CR    SO    SI */
+          0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10   DLE   DC1   DC2   DC3  -RES   -NL    BS  -POC
+                                  -ENP  ->LF             */
+          0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18   CAN    EM  -UBS  -CU1  -IFS  -IGS  -IRS  -ITB
+                                                    -IUS */
+          0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20   -DS  -SOS    FS  -WUS  -BYP    LF   ETB   ESC
+                                  -INP                   */
+          0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28   -SA  -SFE   -SM  -CSP  -MFA   ENQ   ACK   BEL
+                       -SW                               */ 
+          0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30  ----  ----   SYN   -IR   -PP  -TRN  -NBS   EOT */
+          0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38  -SBS   -IT  -RFF  -CU3   DC4   NAK  ----   SUB */
+          0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40    SP   RSP           ��              ----       */
+          0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48                 [     .     <     (     +     ! */
+          0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21,
+ /* 0x50     &                                      ---- */
+          0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58           ��     ]     $     *     )     ;     ^ */
+          0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+ /* 0x60     -     /  ----     ��  ----  ----  ----       */
+          0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68              ----     ,     %     _     >     ? */ 
+          0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70  ----        ----  ----  ----  ----  ----  ---- */
+          0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78     *     `     :     #     @     '     =     " */
+          0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80     *     a     b     c     d     e     f     g */
+          0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88     h     i              ----  ----  ----       */
+          0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90     ��     j     k     l     m     n     o     p */
+          0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98     q     r                    ----        ---- */
+          0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0           ~     s     t     u     v     w     x */
+          0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8     y     z              ----  ----  ----  ---- */
+          0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0                          ----     ��  ----       */
+          0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8        ----           |  ----  ----  ----  ---- */
+          0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0     {     A     B     C     D     E     F     G */
+          0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8     H     I  ----           ��              ---- */
+          0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0     }     J     K     L     M     N     O     P */
+          0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8     Q     R  ----           ��                   */
+          0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0     \           S     T     U     V     W     X */
+          0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8     Y     Z        ----     ��  ----  ----  ---- */
+          0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0     0     1     2     3     4     5     6     7 */
+          0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8     8     9  ----  ----     ��  ----  ----  ---- */
+          0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from upper to lower case
+ */
+__u8 _ebc_tolower[256] =
+{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+	0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+	0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+	0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+	0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+	0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+	0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+	0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
+	0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+	0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
+	0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+	0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+	0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+	0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+	0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+	0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+	0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+	0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from lower to upper case
+ */
+__u8 _ebc_toupper[256] =
+{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+	0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+	0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+	0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+	0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+	0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+	0x78, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+	0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+	0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+	0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
+	0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+	0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
+	0xA0, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+	0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
+	0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+	0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+	0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+	0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+	0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
+	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
+};
+
+EXPORT_SYMBOL(_ascebc_500);
+EXPORT_SYMBOL(_ebcasc_500);
+EXPORT_SYMBOL(_ascebc);
+EXPORT_SYMBOL(_ebcasc);
+EXPORT_SYMBOL(_ebc_tolower);
+EXPORT_SYMBOL(_ebc_toupper);
+
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
new file mode 100644
index 000000000..99b44acbf
--- /dev/null
+++ b/arch/s390/kernel/entry.S
@@ -0,0 +1,1059 @@
+/*
+ *    S390 low-level entry points.
+ *
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Hartmut Penner (hp@de.ibm.com),
+ *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+#include <asm/irq.h>
+
+__PT_R0      =	__PT_GPRS
+__PT_R1      =	__PT_GPRS + 8
+__PT_R2      =	__PT_GPRS + 16
+__PT_R3      =	__PT_GPRS + 24
+__PT_R4      =	__PT_GPRS + 32
+__PT_R5      =	__PT_GPRS + 40
+__PT_R6      =	__PT_GPRS + 48
+__PT_R7      =	__PT_GPRS + 56
+__PT_R8      =	__PT_GPRS + 64
+__PT_R9      =	__PT_GPRS + 72
+__PT_R10     =	__PT_GPRS + 80
+__PT_R11     =	__PT_GPRS + 88
+__PT_R12     =	__PT_GPRS + 96
+__PT_R13     =	__PT_GPRS + 104
+__PT_R14     =	__PT_GPRS + 112
+__PT_R15     =	__PT_GPRS + 120
+
+STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
+STACK_SIZE  = 1 << STACK_SHIFT
+STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
+
+_TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+		   _TIF_UPROBE)
+_TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+		   _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
+_PIF_WORK	= (_PIF_PER_TRAP)
+
+#define BASED(name) name-system_call(%r13)
+
+	.macro	TRACE_IRQS_ON
+#ifdef CONFIG_TRACE_IRQFLAGS
+	basr	%r2,%r0
+	brasl	%r14,trace_hardirqs_on_caller
+#endif
+	.endm
+
+	.macro	TRACE_IRQS_OFF
+#ifdef CONFIG_TRACE_IRQFLAGS
+	basr	%r2,%r0
+	brasl	%r14,trace_hardirqs_off_caller
+#endif
+	.endm
+
+	.macro	LOCKDEP_SYS_EXIT
+#ifdef CONFIG_LOCKDEP
+	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
+	jz	.+10
+	brasl	%r14,lockdep_sys_exit
+#endif
+	.endm
+
+	.macro LPP newpp
+#if IS_ENABLED(CONFIG_KVM)
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_LPP
+	jz	.+8
+	.insn	s,0xb2800000,\newpp
+#endif
+	.endm
+
+	.macro	HANDLE_SIE_INTERCEPT scratch,reason
+#if IS_ENABLED(CONFIG_KVM)
+	tmhh	%r8,0x0001		# interrupting from user ?
+	jnz	.+62
+	lgr	\scratch,%r9
+	slg	\scratch,BASED(.Lsie_critical)
+	clg	\scratch,BASED(.Lsie_critical_length)
+	.if	\reason==1
+	# Some program interrupts are suppressing (e.g. protection).
+	# We must also check the instruction after SIE in that case.
+	# do_protection_exception will rewind to .Lrewind_pad
+	jh	.+42
+	.else
+	jhe	.+42
+	.endif
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	LPP	__SF_EMPTY+16(%r15)		# set host id
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	mvi	__SF_EMPTY+31(%r15),\reason	# set exit reason
+#endif
+	.endm
+
+	.macro	CHECK_STACK stacksize,savearea
+#ifdef CONFIG_CHECK_STACK
+	tml	%r15,\stacksize - CONFIG_STACK_GUARD
+	lghi	%r14,\savearea
+	jz	stack_overflow
+#endif
+	.endm
+
+	.macro	SWITCH_ASYNC savearea,stack,shift
+	tmhh	%r8,0x0001		# interrupting from user ?
+	jnz	1f
+	lgr	%r14,%r9
+	slg	%r14,BASED(.Lcritical_start)
+	clg	%r14,BASED(.Lcritical_length)
+	jhe	0f
+	lghi	%r11,\savearea		# inside critical section, do cleanup
+	brasl	%r14,cleanup_critical
+	tmhh	%r8,0x0001		# retest problem state after cleanup
+	jnz	1f
+0:	lg	%r14,\stack		# are we already on the target stack?
+	slgr	%r14,%r15
+	srag	%r14,%r14,\shift
+	jnz	1f
+	CHECK_STACK 1<<\shift,\savearea
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	j	2f
+1:	lg	%r15,\stack		# load target stack
+2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	.endm
+
+	.macro UPDATE_VTIME scratch,enter_timer
+	lg	\scratch,__LC_EXIT_TIMER
+	slg	\scratch,\enter_timer
+	alg	\scratch,__LC_USER_TIMER
+	stg	\scratch,__LC_USER_TIMER
+	lg	\scratch,__LC_LAST_UPDATE_TIMER
+	slg	\scratch,__LC_EXIT_TIMER
+	alg	\scratch,__LC_SYSTEM_TIMER
+	stg	\scratch,__LC_SYSTEM_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
+	.endm
+
+	.macro	LAST_BREAK scratch
+	srag	\scratch,%r10,23
+	jz	.+10
+	stg	%r10,__TI_last_break(%r12)
+	.endm
+
+	.macro REENABLE_IRQS
+	stg	%r8,__LC_RETURN_PSW
+	ni	__LC_RETURN_PSW,0xbf
+	ssm	__LC_RETURN_PSW
+	.endm
+
+	.macro STCK savearea
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+	.insn	s,0xb27c0000,\savearea		# store clock fast
+#else
+	.insn	s,0xb2050000,\savearea		# store clock
+#endif
+	.endm
+
+	.section .kprobes.text, "ax"
+
+/*
+ * Scheduler resume function, called by switch_to
+ *  gpr2 = (task_struct *) prev
+ *  gpr3 = (task_struct *) next
+ * Returns:
+ *  gpr2 = prev
+ */
+ENTRY(__switch_to)
+	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
+	stg	%r15,__THREAD_ksp(%r2)		# store kernel stack of prev
+	lg	%r4,__THREAD_info(%r2)		# get thread_info of prev
+	lg	%r5,__THREAD_info(%r3)		# get thread_info of next
+	lgr	%r15,%r5
+	aghi	%r15,STACK_INIT			# end of kernel stack of next
+	stg	%r3,__LC_CURRENT		# store task struct of next
+	stg	%r5,__LC_THREAD_INFO		# store thread info of next
+	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
+	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
+	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
+	lg	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
+	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	br	%r14
+
+.L__critical_start:
+/*
+ * SVC interrupt handler routine. System calls are synchronous events and
+ * are executed with interrupts enabled.
+ */
+
+ENTRY(system_call)
+	stpt	__LC_SYNC_ENTER_TIMER
+.Lsysc_stmg:
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	lghi	%r14,_PIF_SYSCALL
+.Lsysc_per:
+	lg	%r15,__LC_KERNEL_STACK
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
+.Lsysc_vtime:
+	UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
+	LAST_BREAK %r13
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
+	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
+	stg	%r14,__PT_FLAGS(%r11)
+.Lsysc_do_svc:
+	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
+	llgh	%r8,__PT_INT_CODE+2(%r11)
+	slag	%r8,%r8,2			# shift and test for svc 0
+	jnz	.Lsysc_nr_ok
+	# svc 0: system call number in %r1
+	llgfr	%r1,%r1				# clear high word in r1
+	cghi	%r1,NR_syscalls
+	jnl	.Lsysc_nr_ok
+	sth	%r1,__PT_INT_CODE+2(%r11)
+	slag	%r8,%r1,2
+.Lsysc_nr_ok:
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stg	%r2,__PT_ORIG_GPR2(%r11)
+	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
+	lgf	%r9,0(%r8,%r10)			# get system call add.
+	tm	__TI_flags+7(%r12),_TIF_TRACE
+	jnz	.Lsysc_tracesys
+	basr	%r14,%r9			# call sys_xxxx
+	stg	%r2,__PT_R2(%r11)		# store return value
+
+.Lsysc_return:
+	LOCKDEP_SYS_EXIT
+.Lsysc_tif:
+	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
+	jno	.Lsysc_restore
+	tm	__PT_FLAGS+7(%r11),_PIF_WORK
+	jnz	.Lsysc_work
+	tm	__TI_flags+7(%r12),_TIF_WORK
+	jnz	.Lsysc_work			# check for work
+	tm	__LC_CPU_FLAGS+7,_CIF_WORK
+	jnz	.Lsysc_work
+.Lsysc_restore:
+	lg	%r14,__LC_VDSO_PER_CPU
+	lmg	%r0,%r10,__PT_R0(%r11)
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+	stpt	__LC_EXIT_TIMER
+	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+	lmg	%r11,%r15,__PT_R11(%r11)
+	lpswe	__LC_RETURN_PSW
+.Lsysc_done:
+
+#
+# One of the work bits is on. Find out which one.
+#
+.Lsysc_work:
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	jo	.Lsysc_mcck_pending
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	jo	.Lsysc_reschedule
+#ifdef CONFIG_UPROBES
+	tm	__TI_flags+7(%r12),_TIF_UPROBE
+	jo	.Lsysc_uprobe_notify
+#endif
+	tm	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
+	jo	.Lsysc_singlestep
+	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
+	jo	.Lsysc_sigpending
+	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+	jo	.Lsysc_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
+	jo	.Lsysc_uaccess
+	j	.Lsysc_return		# beware of critical section cleanup
+
+#
+# _TIF_NEED_RESCHED is set, call schedule
+#
+.Lsysc_reschedule:
+	larl	%r14,.Lsysc_return
+	jg	schedule
+
+#
+# _CIF_MCCK_PENDING is set, call handler
+#
+.Lsysc_mcck_pending:
+	larl	%r14,.Lsysc_return
+	jg	s390_handle_mcck	# TIF bit will be cleared by handler
+
+#
+# _CIF_ASCE is set, load user space asce
+#
+.Lsysc_uaccess:
+	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	j	.Lsysc_return
+
+#
+# _TIF_SIGPENDING is set, call do_signal
+#
+.Lsysc_sigpending:
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,do_signal
+	tm	__PT_FLAGS+7(%r11),_PIF_SYSCALL
+	jno	.Lsysc_return
+	lmg	%r2,%r7,__PT_R2(%r11)	# load svc arguments
+	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
+	lghi	%r8,0			# svc 0 returns -ENOSYS
+	llgh	%r1,__PT_INT_CODE+2(%r11)	# load new svc number
+	cghi	%r1,NR_syscalls
+	jnl	.Lsysc_nr_ok		# invalid svc number -> do svc 0
+	slag	%r8,%r1,2
+	j	.Lsysc_nr_ok		# restart svc
+
+#
+# _TIF_NOTIFY_RESUME is set, call do_notify_resume
+#
+.Lsysc_notify_resume:
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	do_notify_resume
+
+#
+# _TIF_UPROBE is set, call uprobe_notify_resume
+#
+#ifdef CONFIG_UPROBES
+.Lsysc_uprobe_notify:
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	uprobe_notify_resume
+#endif
+
+#
+# _PIF_PER_TRAP is set, call do_per_trap
+#
+.Lsysc_singlestep:
+	ni	__PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	do_per_trap
+
+#
+# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
+# and after the system call
+#
+.Lsysc_tracesys:
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	la	%r3,0
+	llgh	%r0,__PT_INT_CODE+2(%r11)
+	stg	%r0,__PT_R2(%r11)
+	brasl	%r14,do_syscall_trace_enter
+	lghi	%r0,NR_syscalls
+	clgr	%r0,%r2
+	jnh	.Lsysc_tracenogo
+	sllg	%r8,%r2,2
+	lgf	%r9,0(%r8,%r10)
+.Lsysc_tracego:
+	lmg	%r3,%r7,__PT_R3(%r11)
+	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
+	lg	%r2,__PT_ORIG_GPR2(%r11)
+	basr	%r14,%r9		# call sys_xxx
+	stg	%r2,__PT_R2(%r11)	# store return value
+.Lsysc_tracenogo:
+	tm	__TI_flags+7(%r12),_TIF_TRACE
+	jz	.Lsysc_return
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	larl	%r14,.Lsysc_return
+	jg	do_syscall_trace_exit
+
+#
+# a new process exits the kernel with ret_from_fork
+#
+ENTRY(ret_from_fork)
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	lg	%r12,__LC_THREAD_INFO
+	brasl	%r14,schedule_tail
+	TRACE_IRQS_ON
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
+	jne	.Lsysc_tracenogo
+	# it's a kernel thread
+	lmg	%r9,%r10,__PT_R9(%r11)	# load gprs
+ENTRY(kernel_thread_starter)
+	la	%r2,0(%r10)
+	basr	%r14,%r9
+	j	.Lsysc_tracenogo
+
+/*
+ * Program check handler routine
+ */
+
+ENTRY(pgm_check_handler)
+	stpt	__LC_SYNC_ENTER_TIMER
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	larl	%r13,system_call
+	lmg	%r8,%r9,__LC_PGM_OLD_PSW
+	HANDLE_SIE_INTERCEPT %r14,1
+	tmhh	%r8,0x0001		# test problem state bit
+	jnz	1f			# -> fault in user space
+	tmhh	%r8,0x4000		# PER bit set in old PSW ?
+	jnz	0f			# -> enabled, can't be a double fault
+	tm	__LC_PGM_ILC+3,0x80	# check for per exception
+	jnz	.Lpgm_svcper		# -> single stepped svc
+0:	CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	j	2f
+1:	UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
+	LAST_BREAK %r14
+	lg	%r15,__LC_KERNEL_STACK
+	lg	%r14,__TI_task(%r12)
+	lghi	%r13,__LC_PGM_TDB
+	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
+	jz	2f
+	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
+2:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_INT_CODE(4,%r11),__LC_PGM_ILC
+	mvc	__PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	stg	%r10,__PT_ARGS(%r11)
+	tm	__LC_PGM_ILC+3,0x80	# check for per exception
+	jz	0f
+	tmhh	%r8,0x0001		# kernel per event ?
+	jz	.Lpgm_kprobe
+	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
+	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
+	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
+	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
+0:	REENABLE_IRQS
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	larl	%r1,pgm_check_table
+	llgh	%r10,__PT_INT_CODE+2(%r11)
+	nill	%r10,0x007f
+	sll	%r10,2
+	je	.Lsysc_return
+	lgf	%r1,0(%r10,%r1)		# load address of handler routine
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	basr	%r14,%r1		# branch to interrupt-handler
+	j	.Lsysc_return
+
+#
+# PER event in supervisor state, must be kprobes
+#
+.Lpgm_kprobe:
+	REENABLE_IRQS
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,do_per_trap
+	j	.Lsysc_return
+
+#
+# single stepped system call
+#
+.Lpgm_svcper:
+	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+	larl	%r14,.Lsysc_per
+	stg	%r14,__LC_RETURN_PSW+8
+	lghi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
+	lpswe	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
+
+/*
+ * IO interrupt handler routine
+ */
+ENTRY(io_int_handler)
+	STCK	__LC_INT_CLOCK
+	stpt	__LC_ASYNC_ENTER_TIMER
+	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	larl	%r13,system_call
+	lmg	%r8,%r9,__LC_IO_OLD_PSW
+	HANDLE_SIE_INTERCEPT %r14,2
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+	tmhh	%r8,0x0001		# interrupting from user?
+	jz	.Lio_skip
+	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
+	LAST_BREAK %r14
+.Lio_skip:
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	TRACE_IRQS_OFF
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+.Lio_loop:
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	lghi	%r3,IO_INTERRUPT
+	tm	__PT_INT_CODE+8(%r11),0x80	# adapter interrupt ?
+	jz	.Lio_call
+	lghi	%r3,THIN_INTERRUPT
+.Lio_call:
+	brasl	%r14,do_IRQ
+	tm	__LC_MACHINE_FLAGS+6,0x10	# MACHINE_FLAG_LPAR
+	jz	.Lio_return
+	tpi	0
+	jz	.Lio_return
+	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+	j	.Lio_loop
+.Lio_return:
+	LOCKDEP_SYS_EXIT
+	TRACE_IRQS_ON
+.Lio_tif:
+	tm	__TI_flags+7(%r12),_TIF_WORK
+	jnz	.Lio_work		# there is work to do (signals etc.)
+	tm	__LC_CPU_FLAGS+7,_CIF_WORK
+	jnz	.Lio_work
+.Lio_restore:
+	lg	%r14,__LC_VDSO_PER_CPU
+	lmg	%r0,%r10,__PT_R0(%r11)
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+	stpt	__LC_EXIT_TIMER
+	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+	lmg	%r11,%r15,__PT_R11(%r11)
+	lpswe	__LC_RETURN_PSW
+.Lio_done:
+
+#
+# There is work todo, find out in which context we have been interrupted:
+# 1) if we return to user space we can do all _TIF_WORK work
+# 2) if we return to kernel code and kvm is enabled check if we need to
+#    modify the psw to leave SIE
+# 3) if we return to kernel code and preemptive scheduling is enabled check
+#    the preemption counter and if it is zero call preempt_schedule_irq
+# Before any work can be done, a switch to the kernel stack is required.
+#
+.Lio_work:
+	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
+	jo	.Lio_work_user		# yes -> do resched & signal
+#ifdef CONFIG_PREEMPT
+	# check for preemptive scheduling
+	icm	%r0,15,__TI_precount(%r12)
+	jnz	.Lio_restore		# preemption is disabled
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	jno	.Lio_restore
+	# switch to kernel stack
+	lg	%r1,__PT_R15(%r11)
+	aghi	%r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+	la	%r11,STACK_FRAME_OVERHEAD(%r1)
+	lgr	%r15,%r1
+	# TRACE_IRQS_ON already done at .Lio_return, call
+	# TRACE_IRQS_OFF to keep things symmetrical
+	TRACE_IRQS_OFF
+	brasl	%r14,preempt_schedule_irq
+	j	.Lio_return
+#else
+	j	.Lio_restore
+#endif
+
+#
+# Need to do work before returning to userspace, switch to kernel stack
+#
+.Lio_work_user:
+	lg	%r1,__LC_KERNEL_STACK
+	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+	la	%r11,STACK_FRAME_OVERHEAD(%r1)
+	lgr	%r15,%r1
+
+#
+# One of the work bits is on. Find out which one.
+#
+.Lio_work_tif:
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	jo	.Lio_mcck_pending
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	jo	.Lio_reschedule
+	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
+	jo	.Lio_sigpending
+	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+	jo	.Lio_notify_resume
+	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
+	jo	.Lio_uaccess
+	j	.Lio_return		# beware of critical section cleanup
+
+#
+# _CIF_MCCK_PENDING is set, call handler
+#
+.Lio_mcck_pending:
+	# TRACE_IRQS_ON already done at .Lio_return
+	brasl	%r14,s390_handle_mcck	# TIF bit will be cleared by handler
+	TRACE_IRQS_OFF
+	j	.Lio_return
+
+#
+# _CIF_ASCE is set, load user space asce
+#
+.Lio_uaccess:
+	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+	j	.Lio_return
+
+#
+# _TIF_NEED_RESCHED is set, call schedule
+#
+.Lio_reschedule:
+	# TRACE_IRQS_ON already done at .Lio_return
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	brasl	%r14,schedule		# call scheduler
+	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
+	TRACE_IRQS_OFF
+	j	.Lio_return
+
+#
+# _TIF_SIGPENDING or is set, call do_signal
+#
+.Lio_sigpending:
+	# TRACE_IRQS_ON already done at .Lio_return
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,do_signal
+	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
+	TRACE_IRQS_OFF
+	j	.Lio_return
+
+#
+# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
+#
+.Lio_notify_resume:
+	# TRACE_IRQS_ON already done at .Lio_return
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,do_notify_resume
+	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
+	TRACE_IRQS_OFF
+	j	.Lio_return
+
+/*
+ * External interrupt handler routine
+ */
+ENTRY(ext_int_handler)
+	STCK	__LC_INT_CLOCK
+	stpt	__LC_ASYNC_ENTER_TIMER
+	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	larl	%r13,system_call
+	lmg	%r8,%r9,__LC_EXT_OLD_PSW
+	HANDLE_SIE_INTERCEPT %r14,3
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+	tmhh	%r8,0x0001		# interrupting from user ?
+	jz	.Lext_skip
+	UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
+	LAST_BREAK %r14
+.Lext_skip:
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	lghi	%r1,__LC_EXT_PARAMS2
+	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	TRACE_IRQS_OFF
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	lghi	%r3,EXT_INTERRUPT
+	brasl	%r14,do_IRQ
+	j	.Lio_return
+
+/*
+ * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
+ */
+ENTRY(psw_idle)
+	stg	%r3,__SF_EMPTY(%r15)
+	larl	%r1,.Lpsw_idle_lpsw+4
+	stg	%r1,__SF_EMPTY+8(%r15)
+	STCK	__CLOCK_IDLE_ENTER(%r2)
+	stpt	__TIMER_IDLE_ENTER(%r2)
+.Lpsw_idle_lpsw:
+	lpswe	__SF_EMPTY(%r15)
+	br	%r14
+.Lpsw_idle_end:
+
+.L__critical_end:
+
+/*
+ * Machine check handler routines
+ */
+ENTRY(mcck_int_handler)
+	STCK	__LC_MCCK_CLOCK
+	la	%r1,4095		# revalidate r1
+	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# revalidate cpu timer
+	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
+	lg	%r10,__LC_LAST_BREAK
+	lg	%r12,__LC_THREAD_INFO
+	larl	%r13,system_call
+	lmg	%r8,%r9,__LC_MCK_OLD_PSW
+	HANDLE_SIE_INTERCEPT %r14,4
+	tm	__LC_MCCK_CODE,0x80	# system damage?
+	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
+	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
+	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
+	jo	3f
+	la	%r14,__LC_SYNC_ENTER_TIMER
+	clc	0(8,%r14),__LC_ASYNC_ENTER_TIMER
+	jl	0f
+	la	%r14,__LC_ASYNC_ENTER_TIMER
+0:	clc	0(8,%r14),__LC_EXIT_TIMER
+	jl	1f
+	la	%r14,__LC_EXIT_TIMER
+1:	clc	0(8,%r14),__LC_LAST_UPDATE_TIMER
+	jl	2f
+	la	%r14,__LC_LAST_UPDATE_TIMER
+2:	spt	0(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
+3:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
+	jno	.Lmcck_panic		# no -> skip cleanup critical
+	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
+	tm	%r8,0x0001		# interrupting from user ?
+	jz	.Lmcck_skip
+	UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
+	LAST_BREAK %r14
+.Lmcck_skip:
+	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),0(%r14)
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,s390_do_machine_check
+	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
+	jno	.Lmcck_return
+	lg	%r1,__LC_KERNEL_STACK	# switch to kernel stack
+	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+	la	%r11,STACK_FRAME_OVERHEAD(%r1)
+	lgr	%r15,%r1
+	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+	jno	.Lmcck_return
+	TRACE_IRQS_OFF
+	brasl	%r14,s390_handle_mcck
+	TRACE_IRQS_ON
+.Lmcck_return:
+	lg	%r14,__LC_VDSO_PER_CPU
+	lmg	%r0,%r10,__PT_R0(%r11)
+	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
+	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+	jno	0f
+	stpt	__LC_EXIT_TIMER
+	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+0:	lmg	%r11,%r15,__PT_R11(%r11)
+	lpswe	__LC_RETURN_MCCK_PSW
+
+.Lmcck_panic:
+	lg	%r14,__LC_PANIC_STACK
+	slgr	%r14,%r15
+	srag	%r14,%r14,PAGE_SHIFT
+	jz	0f
+	lg	%r15,__LC_PANIC_STACK
+0:	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	j	.Lmcck_skip
+
+#
+# PSW restart interrupt handler
+#
+ENTRY(restart_int_handler)
+	stg	%r15,__LC_SAVE_AREA_RESTART
+	lg	%r15,__LC_RESTART_STACK
+	aghi	%r15,-__PT_SIZE			# create pt_regs on stack
+	xc	0(__PT_SIZE,%r15),0(%r15)
+	stmg	%r0,%r14,__PT_R0(%r15)
+	mvc	__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
+	mvc	__PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
+	aghi	%r15,-STACK_FRAME_OVERHEAD	# create stack frame on stack
+	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+	lg	%r1,__LC_RESTART_FN		# load fn, parm & source cpu
+	lg	%r2,__LC_RESTART_DATA
+	lg	%r3,__LC_RESTART_SOURCE
+	ltgr	%r3,%r3				# test source cpu address
+	jm	1f				# negative -> skip source stop
+0:	sigp	%r4,%r3,SIGP_SENSE		# sigp sense to source cpu
+	brc	10,0b				# wait for status stored
+1:	basr	%r14,%r1			# call function
+	stap	__SF_EMPTY(%r15)		# store cpu address
+	llgh	%r3,__SF_EMPTY(%r15)
+2:	sigp	%r4,%r3,SIGP_STOP		# sigp stop to current cpu
+	brc	2,2b
+3:	j	3b
+
+	.section .kprobes.text, "ax"
+
+#ifdef CONFIG_CHECK_STACK
+/*
+ * The synchronous or the asynchronous stack overflowed. We are dead.
+ * No need to properly save the registers, we are going to panic anyway.
+ * Setup a pt_regs so that show_trace can provide a good call trace.
+ */
+stack_overflow:
+	lg	%r15,__LC_PANIC_STACK	# change to panic stack
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_R8(64,%r11),0(%r14)
+	stg	%r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	jg	kernel_stack_overflow
+#endif
+
+	.align	8
+.Lcleanup_table:
+	.quad	system_call
+	.quad	.Lsysc_do_svc
+	.quad	.Lsysc_tif
+	.quad	.Lsysc_restore
+	.quad	.Lsysc_done
+	.quad	.Lio_tif
+	.quad	.Lio_restore
+	.quad	.Lio_done
+	.quad	psw_idle
+	.quad	.Lpsw_idle_end
+
+cleanup_critical:
+	clg	%r9,BASED(.Lcleanup_table)	# system_call
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
+	jl	.Lcleanup_system_call
+	clg	%r9,BASED(.Lcleanup_table+16)	# .Lsysc_tif
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+24)	# .Lsysc_restore
+	jl	.Lcleanup_sysc_tif
+	clg	%r9,BASED(.Lcleanup_table+32)	# .Lsysc_done
+	jl	.Lcleanup_sysc_restore
+	clg	%r9,BASED(.Lcleanup_table+40)	# .Lio_tif
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+48)	# .Lio_restore
+	jl	.Lcleanup_io_tif
+	clg	%r9,BASED(.Lcleanup_table+56)	# .Lio_done
+	jl	.Lcleanup_io_restore
+	clg	%r9,BASED(.Lcleanup_table+64)	# psw_idle
+	jl	0f
+	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
+	jl	.Lcleanup_idle
+0:	br	%r14
+
+
+.Lcleanup_system_call:
+	# check if stpt has been executed
+	clg	%r9,BASED(.Lcleanup_system_call_insn)
+	jh	0f
+	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	cghi	%r11,__LC_SAVE_AREA_ASYNC
+	je	0f
+	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+0:	# check if stmg has been executed
+	clg	%r9,BASED(.Lcleanup_system_call_insn+8)
+	jh	0f
+	mvc	__LC_SAVE_AREA_SYNC(64),0(%r11)
+0:	# check if base register setup + TIF bit load has been done
+	clg	%r9,BASED(.Lcleanup_system_call_insn+16)
+	jhe	0f
+	# set up saved registers r10 and r12
+	stg	%r10,16(%r11)		# r10 last break
+	stg	%r12,32(%r11)		# r12 thread-info pointer
+0:	# check if the user time update has been done
+	clg	%r9,BASED(.Lcleanup_system_call_insn+24)
+	jh	0f
+	lg	%r15,__LC_EXIT_TIMER
+	slg	%r15,__LC_SYNC_ENTER_TIMER
+	alg	%r15,__LC_USER_TIMER
+	stg	%r15,__LC_USER_TIMER
+0:	# check if the system time update has been done
+	clg	%r9,BASED(.Lcleanup_system_call_insn+32)
+	jh	0f
+	lg	%r15,__LC_LAST_UPDATE_TIMER
+	slg	%r15,__LC_EXIT_TIMER
+	alg	%r15,__LC_SYSTEM_TIMER
+	stg	%r15,__LC_SYSTEM_TIMER
+0:	# update accounting time stamp
+	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+	# do LAST_BREAK
+	lg	%r9,16(%r11)
+	srag	%r9,%r9,23
+	jz	0f
+	mvc	__TI_last_break(8,%r12),16(%r11)
+0:	# set up saved register r11
+	lg	%r15,__LC_KERNEL_STACK
+	la	%r9,STACK_FRAME_OVERHEAD(%r15)
+	stg	%r9,24(%r11)		# r11 pt_regs pointer
+	# fill pt_regs
+	mvc	__PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
+	stmg	%r0,%r7,__PT_R0(%r9)
+	mvc	__PT_PSW(16,%r9),__LC_SVC_OLD_PSW
+	mvc	__PT_INT_CODE(4,%r9),__LC_SVC_ILC
+	xc	__PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
+	mvi	__PT_FLAGS+7(%r9),_PIF_SYSCALL
+	# setup saved register r15
+	stg	%r15,56(%r11)		# r15 stack pointer
+	# set new psw address and exit
+	larl	%r9,.Lsysc_do_svc
+	br	%r14
+.Lcleanup_system_call_insn:
+	.quad	system_call
+	.quad	.Lsysc_stmg
+	.quad	.Lsysc_per
+	.quad	.Lsysc_vtime+18
+	.quad	.Lsysc_vtime+42
+
+.Lcleanup_sysc_tif:
+	larl	%r9,.Lsysc_tif
+	br	%r14
+
+.Lcleanup_sysc_restore:
+	clg	%r9,BASED(.Lcleanup_sysc_restore_insn)
+	je	0f
+	lg	%r9,24(%r11)		# get saved pointer to pt_regs
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
+	mvc	0(64,%r11),__PT_R8(%r9)
+	lmg	%r0,%r7,__PT_R0(%r9)
+0:	lmg	%r8,%r9,__LC_RETURN_PSW
+	br	%r14
+.Lcleanup_sysc_restore_insn:
+	.quad	.Lsysc_done - 4
+
+.Lcleanup_io_tif:
+	larl	%r9,.Lio_tif
+	br	%r14
+
+.Lcleanup_io_restore:
+	clg	%r9,BASED(.Lcleanup_io_restore_insn)
+	je	0f
+	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
+	mvc	0(64,%r11),__PT_R8(%r9)
+	lmg	%r0,%r7,__PT_R0(%r9)
+0:	lmg	%r8,%r9,__LC_RETURN_PSW
+	br	%r14
+.Lcleanup_io_restore_insn:
+	.quad	.Lio_done - 4
+
+.Lcleanup_idle:
+	# copy interrupt clock & cpu timer
+	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
+	cghi	%r11,__LC_SAVE_AREA_ASYNC
+	je	0f
+	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
+0:	# check if stck & stpt have been executed
+	clg	%r9,BASED(.Lcleanup_idle_insn)
+	jhe	1f
+	mvc	__CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
+	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
+1:	# account system time going idle
+	lg	%r9,__LC_STEAL_TIMER
+	alg	%r9,__CLOCK_IDLE_ENTER(%r2)
+	slg	%r9,__LC_LAST_UPDATE_CLOCK
+	stg	%r9,__LC_STEAL_TIMER
+	mvc	__LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
+	lg	%r9,__LC_SYSTEM_TIMER
+	alg	%r9,__LC_LAST_UPDATE_TIMER
+	slg	%r9,__TIMER_IDLE_ENTER(%r2)
+	stg	%r9,__LC_SYSTEM_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
+	# prepare return psw
+	nihh	%r8,0xfcfd		# clear irq & wait state bits
+	lg	%r9,48(%r11)		# return from psw_idle
+	br	%r14
+.Lcleanup_idle_insn:
+	.quad	.Lpsw_idle_lpsw
+
+/*
+ * Integer constants
+ */
+	.align	8
+.Lcritical_start:
+	.quad	.L__critical_start
+.Lcritical_length:
+	.quad	.L__critical_end - .L__critical_start
+
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
+	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
+	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
+	xc	__SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
+	lg	%r14,__LC_GMAP			# get gmap pointer
+	ltgr	%r14,%r14
+	jz	.Lsie_gmap
+	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
+.Lsie_gmap:
+	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),1		# last exit...
+	jnz	.Lsie_done
+	LPP	__SF_EMPTY(%r15)		# set guest id
+	sie	0(%r14)
+.Lsie_done:
+	LPP	__SF_EMPTY+16(%r15)		# set host id
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also HANDLE_SIE_INTERCEPT
+.Lrewind_pad:
+	nop	0
+	.globl sie_exit
+sie_exit:
+	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
+	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
+	lg	%r2,__SF_EMPTY+24(%r15)		# return exit reason code
+	br	%r14
+.Lsie_fault:
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_EMPTY+24(%r15)	# set exit reason code
+	j	sie_exit
+
+	.align	8
+.Lsie_critical:
+	.quad	.Lsie_gmap
+.Lsie_critical_length:
+	.quad	.Lsie_done - .Lsie_gmap
+
+	EX_TABLE(.Lrewind_pad,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
+#endif
+
+	.section .rodata, "a"
+#define SYSCALL(esame,emu)	.long esame
+	.globl	sys_call_table
+sys_call_table:
+#include "syscalls.S"
+#undef SYSCALL
+
+#ifdef CONFIG_COMPAT
+
+#define SYSCALL(esame,emu)	.long emu
+	.globl	sys_call_table_emu
+sys_call_table_emu:
+#include "syscalls.S"
+#undef SYSCALL
+#endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
new file mode 100644
index 000000000..834df047d
--- /dev/null
+++ b/arch/s390/kernel/entry.h
@@ -0,0 +1,81 @@
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/ptrace.h>
+#include <asm/idle.h>
+
+extern void *restart_stack;
+extern unsigned long suspend_zero_pages;
+
+void system_call(void);
+void pgm_check_handler(void);
+void ext_int_handler(void);
+void io_int_handler(void);
+void mcck_int_handler(void);
+void restart_int_handler(void);
+void restart_call_handler(void);
+void psw_idle(struct s390_idle_data *, unsigned long);
+
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
+
+int alloc_vector_registers(struct task_struct *tsk);
+
+void do_protection_exception(struct pt_regs *regs);
+void do_dat_exception(struct pt_regs *regs);
+
+void addressing_exception(struct pt_regs *regs);
+void data_exception(struct pt_regs *regs);
+void default_trap_handler(struct pt_regs *regs);
+void divide_exception(struct pt_regs *regs);
+void execute_exception(struct pt_regs *regs);
+void hfp_divide_exception(struct pt_regs *regs);
+void hfp_overflow_exception(struct pt_regs *regs);
+void hfp_significance_exception(struct pt_regs *regs);
+void hfp_sqrt_exception(struct pt_regs *regs);
+void hfp_underflow_exception(struct pt_regs *regs);
+void illegal_op(struct pt_regs *regs);
+void operand_exception(struct pt_regs *regs);
+void overflow_exception(struct pt_regs *regs);
+void privileged_op(struct pt_regs *regs);
+void space_switch_exception(struct pt_regs *regs);
+void special_op_exception(struct pt_regs *regs);
+void specification_exception(struct pt_regs *regs);
+void transaction_exception(struct pt_regs *regs);
+void translation_exception(struct pt_regs *regs);
+void vector_exception(struct pt_regs *regs);
+
+void do_per_trap(struct pt_regs *regs);
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
+void syscall_trace(struct pt_regs *regs, int entryexit);
+void kernel_stack_overflow(struct pt_regs * regs);
+void do_signal(struct pt_regs *regs);
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+		     struct pt_regs *regs);
+void do_notify_resume(struct pt_regs *regs);
+
+void __init init_IRQ(void);
+void do_IRQ(struct pt_regs *regs, int irq);
+void do_restart(void);
+void __init startup_init(void);
+void die(struct pt_regs *regs, const char *str);
+int setup_profiling_timer(unsigned int multiplier);
+void __init time_init(void);
+int pfn_is_nosave(unsigned long);
+void s390_early_resume(void);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+
+struct s390_mmap_arg_struct;
+struct fadvise64_64_args;
+struct old_sigaction;
+
+long sys_rt_sigreturn(void);
+long sys_sigreturn(void);
+
+long sys_s390_personality(unsigned int personality);
+long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
+long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
+#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
new file mode 100644
index 000000000..e0eaf1113
--- /dev/null
+++ b/arch/s390/kernel/ftrace.c
@@ -0,0 +1,244 @@
+/*
+ * Dynamic function tracer architecture backend.
+ *
+ * Copyright IBM Corp. 2009,2014
+ *
+ *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *		Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <trace/syscall.h>
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include "entry.h"
+
+/*
+ * The mcount code looks like this:
+ *	stg	%r14,8(%r15)		# offset 0
+ *	larl	%r1,<&counter>		# offset 6
+ *	brasl	%r14,_mcount		# offset 12
+ *	lg	%r14,8(%r15)		# offset 18
+ * Total length is 24 bytes. Only the first instruction will be patched
+ * by ftrace_make_call / ftrace_make_nop.
+ * The enabled ftrace code block looks like this:
+ * >	brasl	%r0,ftrace_caller	# offset 0
+ *	larl	%r1,<&counter>		# offset 6
+ *	brasl	%r14,_mcount		# offset 12
+ *	lg	%r14,8(%r15)		# offset 18
+ * The ftrace function gets called with a non-standard C function call ABI
+ * where r0 contains the return address. It is also expected that the called
+ * function only clobbers r0 and r1, but restores r2-r15.
+ * For module code we can't directly jump to ftrace caller, but need a
+ * trampoline (ftrace_plt), which clobbers also r1.
+ * The return point of the ftrace function has offset 24, so execution
+ * continues behind the mcount block.
+ * The disabled ftrace code block looks like this:
+ * >	jg	.+24			# offset 0
+ *	larl	%r1,<&counter>		# offset 6
+ *	brasl	%r14,_mcount		# offset 12
+ *	lg	%r14,8(%r15)		# offset 18
+ * The jg instruction branches to offset 24 to skip as many instructions
+ * as possible.
+ * In case we use gcc's hotpatch feature the original and also the disabled
+ * function prologue contains only a single six byte instruction and looks
+ * like this:
+ * >	brcl	0,0			# offset 0
+ * To enable ftrace the code gets patched like above and afterwards looks
+ * like this:
+ * >	brasl	%r0,ftrace_caller	# offset 0
+ */
+
+unsigned long ftrace_plt;
+
+static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
+{
+#ifdef CC_USING_HOTPATCH
+	/* brcl 0,0 */
+	insn->opc = 0xc004;
+	insn->disp = 0;
+#else
+	/* stg r14,8(r15) */
+	insn->opc = 0xe3e0;
+	insn->disp = 0xf0080024;
+#endif
+}
+
+static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+	if (insn->opc == BREAKPOINT_INSTRUCTION)
+		return 1;
+#endif
+	return 0;
+}
+
+static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+	insn->opc = BREAKPOINT_INSTRUCTION;
+	insn->disp = KPROBE_ON_FTRACE_NOP;
+#endif
+}
+
+static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+	insn->opc = BREAKPOINT_INSTRUCTION;
+	insn->disp = KPROBE_ON_FTRACE_CALL;
+#endif
+}
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	return 0;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	struct ftrace_insn orig, new, old;
+
+	if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+		return -EFAULT;
+	if (addr == MCOUNT_ADDR) {
+		/* Initial code replacement */
+		ftrace_generate_orig_insn(&orig);
+		ftrace_generate_nop_insn(&new);
+	} else if (is_kprobe_on_ftrace(&old)) {
+		/*
+		 * If we find a breakpoint instruction, a kprobe has been
+		 * placed at the beginning of the function. We write the
+		 * constant KPROBE_ON_FTRACE_NOP into the remaining four
+		 * bytes of the original instruction so that the kprobes
+		 * handler can execute a nop, if it reaches this breakpoint.
+		 */
+		ftrace_generate_kprobe_call_insn(&orig);
+		ftrace_generate_kprobe_nop_insn(&new);
+	} else {
+		/* Replace ftrace call with a nop. */
+		ftrace_generate_call_insn(&orig, rec->ip);
+		ftrace_generate_nop_insn(&new);
+	}
+	/* Verify that the to be replaced code matches what we expect. */
+	if (memcmp(&orig, &old, sizeof(old)))
+		return -EINVAL;
+	s390_kernel_write((void *) rec->ip, &new, sizeof(new));
+	return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	struct ftrace_insn orig, new, old;
+
+	if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+		return -EFAULT;
+	if (is_kprobe_on_ftrace(&old)) {
+		/*
+		 * If we find a breakpoint instruction, a kprobe has been
+		 * placed at the beginning of the function. We write the
+		 * constant KPROBE_ON_FTRACE_CALL into the remaining four
+		 * bytes of the original instruction so that the kprobes
+		 * handler can execute a brasl if it reaches this breakpoint.
+		 */
+		ftrace_generate_kprobe_nop_insn(&orig);
+		ftrace_generate_kprobe_call_insn(&new);
+	} else {
+		/* Replace nop with an ftrace call. */
+		ftrace_generate_nop_insn(&orig);
+		ftrace_generate_call_insn(&new, rec->ip);
+	}
+	/* Verify that the to be replaced code matches what we expect. */
+	if (memcmp(&orig, &old, sizeof(old)))
+		return -EINVAL;
+	s390_kernel_write((void *) rec->ip, &new, sizeof(new));
+	return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	return 0;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+
+static int __init ftrace_plt_init(void)
+{
+	unsigned int *ip;
+
+	ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE);
+	if (!ftrace_plt)
+		panic("cannot allocate ftrace plt\n");
+	ip = (unsigned int *) ftrace_plt;
+	ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
+	ip[1] = 0x100a0004;
+	ip[2] = 0x07f10000;
+	ip[3] = FTRACE_ADDR >> 32;
+	ip[4] = FTRACE_ADDR & 0xffffffff;
+	set_memory_ro(ftrace_plt, 1);
+	return 0;
+}
+device_initcall(ftrace_plt_init);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * Hook the return address and push it in the stack of return addresses
+ * in current thread info.
+ */
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+{
+	struct ftrace_graph_ent trace;
+
+	if (unlikely(ftrace_graph_is_dead()))
+		goto out;
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		goto out;
+	ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+	trace.func = ip;
+	trace.depth = current->curr_ret_stack + 1;
+	/* Only trace if the calling function expects to. */
+	if (!ftrace_graph_entry(&trace))
+		goto out;
+	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+		goto out;
+	parent = (unsigned long) return_to_handler;
+out:
+	return parent;
+}
+NOKPROBE_SYMBOL(prepare_ftrace_return);
+
+/*
+ * Patch the kernel code at ftrace_graph_caller location. The instruction
+ * there is branch relative on condition. To enable the ftrace graph code
+ * block, we simply patch the mask field of the instruction to zero and
+ * turn the instruction into a nop.
+ * To disable the ftrace graph code the mask field will be patched to
+ * all ones, which turns the instruction into an unconditional branch.
+ */
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	u8 op = 0x04; /* set mask field to zero */
+
+	s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+	return 0;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	u8 op = 0xf4; /* set mask field to all ones */
+
+	s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+	return 0;
+}
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
new file mode 100644
index 000000000..59b7c6470
--- /dev/null
+++ b/arch/s390/kernel/head.S
@@ -0,0 +1,454 @@
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		 Rob van der Heij <rvdhei@iae.nl>
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ * There are 5 different IPL methods
+ *  1) load the image directly into ram at address 0 and do an PSW restart
+ *  2) linload will load the image from address 0x10000 to memory 0x10000
+ *     and start the code thru LPSW 0x0008000080010000 (VM only, deprecated)
+ *  3) generate the tape ipl header, store the generated image on a tape
+ *     and ipl from it
+ *     In case of SL tape you need to IPL 5 times to get past VOL1 etc
+ *  4) generate the vm reader ipl header, move the generated image to the
+ *     VM reader (use option NOH!) and do a ipl from reader (VM only)
+ *  5) direct call of start by the SALIPL loader
+ *  We use the cpuid to distinguish between VM and native ipl
+ *  params for kernel are pushed to 0x10400 (see setup.h)
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+
+#define ARCH_OFFSET	4
+
+__HEAD
+
+#define IPL_BS	0x730
+	.org	0
+	.long	0x00080000,0x80000000+iplstart	# The first 24 bytes are loaded
+	.long	0x02000018,0x60000050		# by ipl to addresses 0-23.
+	.long	0x02000068,0x60000050		# (a PSW and two CCWs).
+	.fill	80-24,1,0x40			# bytes 24-79 are discarded !!
+	.long	0x020000f0,0x60000050		# The next 160 byte are loaded
+	.long	0x02000140,0x60000050		# to addresses 0x18-0xb7
+	.long	0x02000190,0x60000050		# They form the continuation
+	.long	0x020001e0,0x60000050		# of the CCW program started
+	.long	0x02000230,0x60000050		# by ipl and load the range
+	.long	0x02000280,0x60000050		# 0x0f0-0x730 from the image
+	.long	0x020002d0,0x60000050		# to the range 0x0f0-0x730
+	.long	0x02000320,0x60000050		# in memory. At the end of
+	.long	0x02000370,0x60000050		# the channel program the PSW
+	.long	0x020003c0,0x60000050		# at location 0 is loaded.
+	.long	0x02000410,0x60000050		# Initial processing starts
+	.long	0x02000460,0x60000050		# at 0x200 = iplstart.
+	.long	0x020004b0,0x60000050
+	.long	0x02000500,0x60000050
+	.long	0x02000550,0x60000050
+	.long	0x020005a0,0x60000050
+	.long	0x020005f0,0x60000050
+	.long	0x02000640,0x60000050
+	.long	0x02000690,0x60000050
+	.long	0x020006e0,0x20000050
+
+	.org	0x200
+#
+# subroutine to set architecture mode
+#
+.Lsetmode:
+	mvi	__LC_AR_MODE_ID,1	# set esame flag
+	slr	%r0,%r0 		# set cpuid to zero
+	lhi	%r1,2			# mode 2 = esame (dump)
+	sigp	%r1,%r0,0x12		# switch to esame mode
+	bras	%r13,0f
+	.fill	16,4,0x0
+0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
+	sam31				# switch to 31 bit addressing mode
+	br	%r14
+
+#
+# subroutine to wait for end I/O
+#
+.Lirqwait:
+	mvc	0x1f0(16),.Lnewpsw	# set up IO interrupt psw
+	lpsw	.Lwaitpsw
+.Lioint:
+	br	%r14
+	.align	8
+.Lnewpsw:
+	.quad	0x0000000080000000,.Lioint
+.Lwaitpsw:
+	.long	0x020a0000,0x80000000+.Lioint
+
+#
+# subroutine for loading cards from the reader
+#
+.Lloader:
+	la	%r4,0(%r14)
+	la	%r3,.Lorb		# r2 = address of orb into r2
+	la	%r5,.Lirb		# r4 = address of irb
+	la	%r6,.Lccws
+	la	%r7,20
+.Linit:
+	st	%r2,4(%r6)		# initialize CCW data addresses
+	la	%r2,0x50(%r2)
+	la	%r6,8(%r6)
+	bct	7,.Linit
+
+	lctl	%c6,%c6,.Lcr6		# set IO subclass mask
+	slr	%r2,%r2
+.Lldlp:
+	ssch	0(%r3)			# load chunk of 1600 bytes
+	bnz	.Llderr
+.Lwait4irq:
+	bas	%r14,.Lirqwait
+	c	%r1,0xb8		# compare subchannel number
+	bne	.Lwait4irq
+	tsch	0(%r5)
+
+	slr	%r0,%r0
+	ic	%r0,8(%r5)		# get device status
+	chi	%r0,8			# channel end ?
+	be	.Lcont
+	chi	%r0,12			# channel end + device end ?
+	be	.Lcont
+
+	l	%r0,4(%r5)
+	s	%r0,8(%r3)		# r0/8 = number of ccws executed
+	mhi	%r0,10			# *10 = number of bytes in ccws
+	lh	%r3,10(%r5)		# get residual count
+	sr	%r0,%r3 		# #ccws*80-residual=#bytes read
+	ar	%r2,%r0
+
+	br	%r4			# r2 contains the total size
+
+.Lcont:
+	ahi	%r2,0x640		# add 0x640 to total size
+	la	%r6,.Lccws
+	la	%r7,20
+.Lincr:
+	l	%r0,4(%r6)		# update CCW data addresses
+	ahi	%r0,0x640
+	st	%r0,4(%r6)
+	ahi	%r6,8
+	bct	7,.Lincr
+
+	b	.Lldlp
+.Llderr:
+	lpsw	.Lcrash
+
+	.align	8
+.Lorb:	.long	0x00000000,0x0080ff00,.Lccws
+.Lirb:	.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.Lcr6:	.long	0xff000000
+.Lloadp:.long	0,0
+	.align	8
+.Lcrash:.long	0x000a0000,0x00000000
+
+	.align	8
+.Lccws: .rept	19
+	.long	0x02600050,0x00000000
+	.endr
+	.long	0x02200050,0x00000000
+
+iplstart:
+	bas	%r14,.Lsetmode		# Immediately switch to 64 bit mode
+	lh	%r1,0xb8		# test if subchannel number
+	bct	%r1,.Lnoload		#  is valid
+	l	%r1,0xb8		# load ipl subchannel number
+	la	%r2,IPL_BS		# load start address
+	bas	%r14,.Lloader		# load rest of ipl image
+	l	%r12,.Lparm		# pointer to parameter area
+	st	%r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number
+
+#
+# load parameter file from ipl device
+#
+.Lagain1:
+	l	%r2,.Linitrd		# ramdisk loc. is temp
+	bas	%r14,.Lloader		# load parameter file
+	ltr	%r2,%r2 		# got anything ?
+	bz	.Lnopf
+	chi	%r2,895
+	bnh	.Lnotrunc
+	la	%r2,895
+.Lnotrunc:
+	l	%r4,.Linitrd
+	clc	0(3,%r4),.L_hdr		# if it is HDRx
+	bz	.Lagain1		# skip dataset header
+	clc	0(3,%r4),.L_eof		# if it is EOFx
+	bz	.Lagain1		# skip dateset trailer
+	la	%r5,0(%r4,%r2)
+	lr	%r3,%r2
+	la	%r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line
+	mvc	0(256,%r3),0(%r4)
+	mvc	256(256,%r3),256(%r4)
+	mvc	512(256,%r3),512(%r4)
+	mvc	768(122,%r3),768(%r4)
+	slr	%r0,%r0
+	b	.Lcntlp
+.Ldelspc:
+	ic	%r0,0(%r2,%r3)
+	chi	%r0,0x20		# is it a space ?
+	be	.Lcntlp
+	ahi	%r2,1
+	b	.Leolp
+.Lcntlp:
+	brct	%r2,.Ldelspc
+.Leolp:
+	slr	%r0,%r0
+	stc	%r0,0(%r2,%r3)		# terminate buffer
+.Lnopf:
+
+#
+# load ramdisk from ipl device
+#
+.Lagain2:
+	l	%r2,.Linitrd		# addr of ramdisk
+	st	%r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
+	bas	%r14,.Lloader		# load ramdisk
+	st	%r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd
+	ltr	%r2,%r2
+	bnz	.Lrdcont
+	st	%r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
+.Lrdcont:
+	l	%r2,.Linitrd
+
+	clc	0(3,%r2),.L_hdr		# skip HDRx and EOFx
+	bz	.Lagain2
+	clc	0(3,%r2),.L_eof
+	bz	.Lagain2
+
+#
+# reset files in VM reader
+#
+	stidp	.Lcpuid			# store cpuid
+	tm	.Lcpuid,0xff		# running VM ?
+	bno	.Lnoreset
+	la	%r2,.Lreset
+	lhi	%r3,26
+	diag	%r2,%r3,8
+	la	%r5,.Lirb
+	stsch	0(%r5)			# check if irq is pending
+	tm	30(%r5),0x0f		# by verifying if any of the
+	bnz	.Lwaitforirq		# activity or status control
+	tm	31(%r5),0xff		# bits is set in the schib
+	bz	.Lnoreset
+.Lwaitforirq:
+	bas	%r14,.Lirqwait		# wait for IO interrupt
+	c	%r1,0xb8		# compare subchannel number
+	bne	.Lwaitforirq
+	la	%r5,.Lirb
+	tsch	0(%r5)
+.Lnoreset:
+	b	.Lnoload
+
+#
+# everything loaded, go for it
+#
+.Lnoload:
+	l	%r1,.Lstartup
+	br	%r1
+
+.Linitrd:.long _end			# default address of initrd
+.Lparm:	.long  PARMAREA
+.Lstartup: .long startup
+.Lreset:.byte	0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
+	.byte	0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
+	.byte	0xc8,0xd6,0xd3,0xc4	# "change rdr all keep nohold"
+.L_eof: .long	0xc5d6c600	 /* C'EOF' */
+.L_hdr: .long	0xc8c4d900	 /* C'HDR' */
+	.align	8
+.Lcpuid:.fill	8,1,0
+
+#
+# SALIPL loader support. Based on a patch by Rob van der Heij.
+# This entry point is called directly from the SALIPL loader and
+# doesn't need a builtin ipl record.
+#
+	.org	0x800
+ENTRY(start)
+	stm	%r0,%r15,0x07b0		# store registers
+	bas	%r14,.Lsetmode		# Immediately switch to 64 bit mode
+	basr	%r12,%r0
+.base:
+	l	%r11,.parm
+	l	%r8,.cmd		# pointer to command buffer
+
+	ltr	%r9,%r9			# do we have SALIPL parameters?
+	bp	.sk8x8
+
+	mvc	0(64,%r8),0x00b0	# copy saved registers
+	xc	64(240-64,%r8),0(%r8)	# remainder of buffer
+	tr	0(64,%r8),.lowcase
+	b	.gotr
+.sk8x8:
+	mvc	0(240,%r8),0(%r9)	# copy iplparms into buffer
+.gotr:
+	slr	%r0,%r0
+	st	%r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11)
+	st	%r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11)
+	j	startup 		# continue with startup
+.cmd:	.long	COMMAND_LINE		# address of command line buffer
+.parm:	.long	PARMAREA
+.lowcase:
+	.byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
+	.byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
+	.byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17
+	.byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
+	.byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27
+	.byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
+	.byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37
+	.byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
+	.byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47
+	.byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f
+	.byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57
+	.byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f
+	.byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67
+	.byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f
+	.byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77
+	.byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f
+
+	.byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87
+	.byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f
+	.byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97
+	.byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f
+	.byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7
+	.byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf
+	.byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7
+	.byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf
+	.byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87	# .abcdefg
+	.byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf	# hi
+	.byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97	# .jklmnop
+	.byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf	# qr
+	.byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7	# ..stuvwx
+	.byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef	# yz
+	.byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7
+	.byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+
+#
+# startup-code at 0x10000, running in absolute addressing mode
+# this is called either by the ipl loader or directly by PSW restart
+# or linload or SALIPL
+#
+	.org	0x10000
+ENTRY(startup)
+	j	.Lep_startup_normal
+	.org	0x10008
+#
+# This is a list of s390 kernel entry points. At address 0x1000f the number of
+# valid entry points is stored.
+#
+# IMPORTANT: Do not change this table, it is s390 kernel ABI!
+#
+	.ascii	"S390EP"
+	.byte	0x00,0x01
+#
+# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
+#
+	.org	0x10010
+ENTRY(startup_kdump)
+	j	.Lep_startup_kdump
+.Lep_startup_normal:
+	mvi	__LC_AR_MODE_ID,1	# set esame flag
+	slr	%r0,%r0 		# set cpuid to zero
+	lhi	%r1,2			# mode 2 = esame (dump)
+	sigp	%r1,%r0,0x12		# switch to esame mode
+	bras	%r13,0f
+	.fill	16,4,0x0
+0:	lmh	%r0,%r15,0(%r13)	# clear high-order half of gprs
+	sam31				# switch to 31 bit addressing mode
+	basr	%r13,0			# get base
+.LPG0:
+	xc	0x200(256),0x200	# partially clear lowcore
+	xc	0x300(256),0x300
+	xc	0xe00(256),0xe00
+	stck	__LC_LAST_UPDATE_CLOCK
+	spt	6f-.LPG0(%r13)
+	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
+	xc	__LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST
+	# check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10}
+	.insn	s,0xb2b10000,0		# store facilities @ __LC_STFL_FAC_LIST
+	tm	__LC_STFL_FAC_LIST,0x01	# stfle available ?
+	jz	0f
+	la	%r0,1
+	.insn	s,0xb2b00000,__LC_STFL_FAC_LIST	# store facility list extended
+	# verify if all required facilities are supported by the machine
+0:	la	%r1,__LC_STFL_FAC_LIST
+	la	%r2,3f+8-.LPG0(%r13)
+	l	%r3,0(%r2)
+1:	l	%r0,0(%r1)
+	n	%r0,4(%r2)
+	cl	%r0,4(%r2)
+	jne	2f
+	la	%r1,4(%r1)
+	la	%r2,4(%r2)
+	ahi	%r3,-1
+	jnz	1b
+	j	4f
+2:	l	%r15,.Lstack-.LPG0(%r13)
+	ahi	%r15,-96
+	la	%r2,.Lals_string-.LPG0(%r13)
+	l	%r3,.Lsclp_print-.LPG0(%r13)
+	basr	%r14,%r3
+	lpsw	3f-.LPG0(%r13)		# machine type not good enough, crash
+.Lals_string:
+	.asciz	"The Linux kernel requires more recent processor hardware"
+.Lsclp_print:
+	.long	_sclp_print_early
+.Lstack:
+	.long	0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
+	.align 16
+3:	.long	0x000a0000,0x8badcccc
+
+# List of facilities that are required. If not all facilities are present
+# the kernel will crash. Format is number of facility words with bits set,
+# followed by the facility words.
+
+#if defined(CONFIG_MARCH_Z13)
+	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+#elif defined(CONFIG_MARCH_ZEC12)
+	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+#elif defined(CONFIG_MARCH_Z196)
+	.long 2, 0xc100eff2, 0xf46c0000
+#elif defined(CONFIG_MARCH_Z10)
+	.long 2, 0xc100eff2, 0xf0680000
+#elif defined(CONFIG_MARCH_Z9_109)
+	.long 1, 0xc100efc2
+#elif defined(CONFIG_MARCH_Z990)
+	.long 1, 0xc0002000
+#elif defined(CONFIG_MARCH_Z900)
+	.long 1, 0xc0000000
+#endif
+4:
+	/* Continue with 64bit startup code in head64.S */
+	sam64				# switch to 64 bit mode
+	jg	startup_continue
+
+	.align	8
+6:	.long	0x7fffffff,0xffffffff
+
+#include "head_kdump.S"
+
+#
+# params at 10400 (setup.h)
+#
+	.org	PARMAREA
+	.long	0,0			# IPL_DEVICE
+	.long	0,0			# INITRD_START
+	.long	0,0			# INITRD_SIZE
+	.long	0,0			# OLDMEM_BASE
+	.long	0,0			# OLDMEM_SIZE
+
+	.org	COMMAND_LINE
+	.byte	"root=/dev/ram0 ro"
+	.byte	0
+
+	.org	0x11000
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
new file mode 100644
index 000000000..d7c005075
--- /dev/null
+++ b/arch/s390/kernel/head64.S
@@ -0,0 +1,105 @@
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ *   Author(s):	Hartmut Penner <hp@de.ibm.com>
+ *		Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		Rob van der Heij <rvdhei@iae.nl>
+ *		Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+
+__HEAD
+ENTRY(startup_continue)
+	larl	%r1,sched_clock_base_cc
+	mvc	0(8,%r1),__LC_LAST_UPDATE_CLOCK
+	larl	%r13,.LPG1		# get base
+	lctlg	%c0,%c15,.Lctl-.LPG1(%r13)	# load control registers
+	lg	%r12,.Lparmaddr-.LPG1(%r13)	# pointer to parameter area
+					# move IPL device to lowcore
+	lghi	%r0,__LC_PASTE
+	stg	%r0,__LC_VDSO_PER_CPU
+#
+# Setup stack
+#
+	larl	%r15,init_thread_union
+	stg	%r15,__LC_THREAD_INFO	# cache thread info in lowcore
+	lg	%r14,__TI_task(%r15)	# cache current in lowcore
+	stg	%r14,__LC_CURRENT
+	aghi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
+	stg	%r15,__LC_KERNEL_STACK	# set end of kernel stack
+	aghi	%r15,-160
+#
+# Save ipl parameters, clear bss memory, initialize storage key for kernel pages,
+# and create a kernel NSS if the SAVESYS= parm is defined
+#
+	brasl	%r14,startup_init
+	lpswe	.Lentry-.LPG1(13)	# jump to _stext in primary-space,
+					# virtual and never return ...
+	.align	16
+.LPG1:
+.Lentry:.quad	0x0000000180000000,_stext
+.Lctl:	.quad	0x04040000		# cr0: AFP registers & secondary space
+	.quad	0			# cr1: primary space segment table
+	.quad	.Lduct			# cr2: dispatchable unit control table
+	.quad	0			# cr3: instruction authorization
+	.quad	0			# cr4: instruction authorization
+	.quad	.Lduct			# cr5: primary-aste origin
+	.quad	0			# cr6:	I/O interrupts
+	.quad	0			# cr7:	secondary space segment table
+	.quad	0			# cr8:	access registers translation
+	.quad	0			# cr9:	tracing off
+	.quad	0			# cr10: tracing off
+	.quad	0			# cr11: tracing off
+	.quad	0			# cr12: tracing off
+	.quad	0			# cr13: home space segment table
+	.quad	0xc0000000		# cr14: machine check handling off
+	.quad	.Llinkage_stack		# cr15: linkage stack operations
+.Lpcmsk:.quad	0x0000000180000000
+.L4malign:.quad 0xffffffffffc00000
+.Lscan2g:.quad	0x80000000 + 0x20000 - 8	# 2GB + 128K - 8
+.Lnop:	.long	0x07000700
+.Lparmaddr:
+	.quad	PARMAREA
+	.align	64
+.Lduct: .long	0,.Laste,.Laste,0,.Lduald,0,0,0
+	.long	0,0,0,0,0,0,0,0
+.Laste:	.quad	0,0xffffffffffffffff,0,0,0,0,0,0
+	.align	128
+.Lduald:.rept	8
+	.long	0x80000000,0,0,0	# invalid access-list entries
+	.endr
+.Llinkage_stack:
+	.long	0,0,0x89000000,0,0,0,0x8a000000,0
+
+ENTRY(_ehead)
+
+	.org	0x100000 - 0x11000	# head.o ends at 0x11000
+#
+# startup-code, running in absolute addressing mode
+#
+ENTRY(_stext)
+	basr	%r13,0			# get base
+.LPG3:
+# check control registers
+	stctg	%c0,%c15,0(%r15)
+	oi	6(%r15),0x60		# enable sigp emergency & external call
+	oi	4(%r15),0x10		# switch on low address proctection
+	lctlg	%c0,%c15,0(%r15)
+
+	lam	0,15,.Laregs-.LPG3(%r13)	# load acrs needed by uaccess
+	brasl	%r14,start_kernel	# go to C code
+#
+# We returned from start_kernel ?!? PANIK
+#
+	basr	%r13,0
+	lpswe	.Ldw-.(%r13)		# load disabled wait psw
+
+	.align	8
+.Ldw:	.quad	0x0002000180000000,0x0000000000000000
+.Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S
new file mode 100644
index 000000000..d05950f02
--- /dev/null
+++ b/arch/s390/kernel/head_kdump.S
@@ -0,0 +1,100 @@
+/*
+ * S390 kdump lowlevel functions (new kernel)
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <asm/sigp.h>
+
+#define DATAMOVER_ADDR	0x4000
+#define COPY_PAGE_ADDR	0x6000
+
+#ifdef CONFIG_CRASH_DUMP
+
+#
+# kdump entry (new kernel - not yet relocated)
+#
+# Note: This code has to be position independent
+#
+
+.align 2
+.Lep_startup_kdump:
+	lhi	%r1,2				# mode 2 = esame (dump)
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to esame mode
+	sam64					# Switch to 64 bit addressing
+	basr	%r13,0
+.Lbase:
+	larl	%r2,.Lbase_addr			# Check, if we have been
+	lg	%r2,0(%r2)			# already relocated:
+	clgr	%r2,%r13			#
+	jne	.Lrelocate			# No : Start data mover
+	lghi	%r2,0				# Yes: Start kdump kernel
+	brasl	%r14,startup_kdump_relocated
+
+.Lrelocate:
+	larl	%r4,startup
+	lg	%r2,0x418(%r4)			# Get kdump base
+	lg	%r3,0x420(%r4)			# Get kdump size
+
+	larl	%r10,.Lcopy_start		# Source of data mover
+	lghi	%r8,DATAMOVER_ADDR		# Target of data mover
+	mvc	0(256,%r8),0(%r10)		# Copy data mover code
+
+	agr	%r8,%r2				# Copy data mover to
+	mvc	0(256,%r8),0(%r10)		# reserved mem
+
+	lghi	%r14,DATAMOVER_ADDR		# Jump to copied data mover
+	basr	%r14,%r14
+.Lbase_addr:
+	.quad	.Lbase
+
+#
+# kdump data mover code (runs at address DATAMOVER_ADDR)
+#
+# r2: kdump base address
+# r3: kdump size
+#
+.Lcopy_start:
+	basr	%r13,0				# Base
+0:
+	lgr	%r11,%r2			# Save kdump base address
+	lgr	%r12,%r2
+	agr	%r12,%r3			# Compute kdump end address
+
+	lghi	%r5,0
+	lghi	%r10,COPY_PAGE_ADDR		# Load copy page address
+1:
+	mvc	0(256,%r10),0(%r5)		# Copy old kernel to tmp
+	mvc	0(256,%r5),0(%r11)		# Copy new kernel to old
+	mvc	0(256,%r11),0(%r10)		# Copy tmp to new
+	aghi	%r11,256
+	aghi	%r5,256
+	clgr	%r11,%r12
+	jl	1b
+
+	lg	%r14,.Lstartup_kdump-0b(%r13)
+	basr	%r14,%r14			# Start relocated kernel
+.Lstartup_kdump:
+	.long	0x00000000,0x00000000 + startup_kdump_relocated
+.Lcopy_end:
+
+#
+# Startup of kdump (relocated new kernel)
+#
+.align 2
+startup_kdump_relocated:
+	basr	%r13,0
+0:	lpswe	.Lrestart_psw-0b(%r13)		# Start new kernel...
+.align	8
+.Lrestart_psw:
+	.quad	0x0000000080000000,0x0000000000000000 + startup
+#else
+.align 2
+.Lep_startup_kdump:
+	larl	%r13,startup_kdump_crash
+	lpswe	0(%r13)
+.align 8
+startup_kdump_crash:
+	.quad	0x0002000080000000,0x0000000000000000 + startup_kdump_crash
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
new file mode 100644
index 000000000..7a55c29b0
--- /dev/null
+++ b/arch/s390/kernel/idle.c
@@ -0,0 +1,125 @@
+/*
+ * Idle functions for s390.
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/kprobes.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include "entry.h"
+
+static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
+void enabled_wait(void)
+{
+	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+	unsigned long long idle_time;
+	unsigned long psw_mask;
+
+	trace_hardirqs_on();
+
+	/* Wait for external, I/O or machine check interrupt. */
+	psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
+		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+	clear_cpu_flag(CIF_NOHZ_DELAY);
+
+	/* Call the assembler magic in entry.S */
+	psw_idle(idle, psw_mask);
+
+	trace_hardirqs_off();
+
+	/* Account time spent with enabled wait psw loaded as idle time. */
+	write_seqcount_begin(&idle->seqcount);
+	idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
+	idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
+	idle->idle_time += idle_time;
+	idle->idle_count++;
+	account_idle_time(idle_time);
+	write_seqcount_end(&idle->seqcount);
+}
+NOKPROBE_SYMBOL(enabled_wait);
+
+static ssize_t show_idle_count(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+	unsigned long long idle_count;
+	unsigned int seq;
+
+	do {
+		seq = read_seqcount_begin(&idle->seqcount);
+		idle_count = ACCESS_ONCE(idle->idle_count);
+		if (ACCESS_ONCE(idle->clock_idle_enter))
+			idle_count++;
+	} while (read_seqcount_retry(&idle->seqcount, seq));
+	return sprintf(buf, "%llu\n", idle_count);
+}
+DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
+
+static ssize_t show_idle_time(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+	unsigned long long now, idle_time, idle_enter, idle_exit;
+	unsigned int seq;
+
+	do {
+		now = get_tod_clock();
+		seq = read_seqcount_begin(&idle->seqcount);
+		idle_time = ACCESS_ONCE(idle->idle_time);
+		idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
+		idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
+	} while (read_seqcount_retry(&idle->seqcount, seq));
+	idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
+	return sprintf(buf, "%llu\n", idle_time >> 12);
+}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+
+cputime64_t arch_cpu_idle_time(int cpu)
+{
+	struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+	unsigned long long now, idle_enter, idle_exit;
+	unsigned int seq;
+
+	do {
+		now = get_tod_clock();
+		seq = read_seqcount_begin(&idle->seqcount);
+		idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
+		idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
+	} while (read_seqcount_retry(&idle->seqcount, seq));
+	return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
+}
+
+void arch_cpu_idle_enter(void)
+{
+	local_mcck_disable();
+}
+
+void arch_cpu_idle(void)
+{
+	if (!test_cpu_flag(CIF_MCCK_PENDING))
+		/* Halt the cpu and keep track of cpu time accounting. */
+		enabled_wait();
+	local_irq_enable();
+}
+
+void arch_cpu_idle_exit(void)
+{
+	local_mcck_enable();
+	if (test_cpu_flag(CIF_MCCK_PENDING))
+		s390_handle_mcck();
+}
+
+void arch_cpu_idle_dead(void)
+{
+	cpu_die();
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
new file mode 100644
index 000000000..52fbef91d
--- /dev/null
+++ b/arch/s390/kernel/ipl.c
@@ -0,0 +1,2064 @@
+/*
+ *    ipl/reipl/dump support for Linux on s390.
+ *
+ *    Copyright IBM Corp. 2005, 2012
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>
+ *		 Volker Sameske <sameske@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/crash_dump.h>
+#include <linux/debug_locks.h>
+#include <asm/ipl.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
+#include <asm/cpcmd.h>
+#include <asm/cio.h>
+#include <asm/ebcdic.h>
+#include <asm/reset.h>
+#include <asm/sclp.h>
+#include <asm/checksum.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include "entry.h"
+
+#define IPL_PARM_BLOCK_VERSION 0
+
+#define IPL_UNKNOWN_STR		"unknown"
+#define IPL_CCW_STR		"ccw"
+#define IPL_FCP_STR		"fcp"
+#define IPL_FCP_DUMP_STR	"fcp_dump"
+#define IPL_NSS_STR		"nss"
+
+#define DUMP_CCW_STR		"ccw"
+#define DUMP_FCP_STR		"fcp"
+#define DUMP_NONE_STR		"none"
+
+/*
+ * Four shutdown trigger types are supported:
+ * - panic
+ * - halt
+ * - power off
+ * - reipl
+ * - restart
+ */
+#define ON_PANIC_STR		"on_panic"
+#define ON_HALT_STR		"on_halt"
+#define ON_POFF_STR		"on_poff"
+#define ON_REIPL_STR		"on_reboot"
+#define ON_RESTART_STR		"on_restart"
+
+struct shutdown_action;
+struct shutdown_trigger {
+	char *name;
+	struct shutdown_action *action;
+};
+
+/*
+ * The following shutdown action types are supported:
+ */
+#define SHUTDOWN_ACTION_IPL_STR		"ipl"
+#define SHUTDOWN_ACTION_REIPL_STR	"reipl"
+#define SHUTDOWN_ACTION_DUMP_STR	"dump"
+#define SHUTDOWN_ACTION_VMCMD_STR	"vmcmd"
+#define SHUTDOWN_ACTION_STOP_STR	"stop"
+#define SHUTDOWN_ACTION_DUMP_REIPL_STR	"dump_reipl"
+
+struct shutdown_action {
+	char *name;
+	void (*fn) (struct shutdown_trigger *trigger);
+	int (*init) (void);
+	int init_rc;
+};
+
+static char *ipl_type_str(enum ipl_type type)
+{
+	switch (type) {
+	case IPL_TYPE_CCW:
+		return IPL_CCW_STR;
+	case IPL_TYPE_FCP:
+		return IPL_FCP_STR;
+	case IPL_TYPE_FCP_DUMP:
+		return IPL_FCP_DUMP_STR;
+	case IPL_TYPE_NSS:
+		return IPL_NSS_STR;
+	case IPL_TYPE_UNKNOWN:
+	default:
+		return IPL_UNKNOWN_STR;
+	}
+}
+
+enum dump_type {
+	DUMP_TYPE_NONE	= 1,
+	DUMP_TYPE_CCW	= 2,
+	DUMP_TYPE_FCP	= 4,
+};
+
+static char *dump_type_str(enum dump_type type)
+{
+	switch (type) {
+	case DUMP_TYPE_NONE:
+		return DUMP_NONE_STR;
+	case DUMP_TYPE_CCW:
+		return DUMP_CCW_STR;
+	case DUMP_TYPE_FCP:
+		return DUMP_FCP_STR;
+	default:
+		return NULL;
+	}
+}
+
+/*
+ * Must be in data section since the bss section
+ * is not cleared when these are accessed.
+ */
+static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
+u32 ipl_flags __attribute__((__section__(".data"))) = 0;
+
+enum ipl_method {
+	REIPL_METHOD_CCW_CIO,
+	REIPL_METHOD_CCW_DIAG,
+	REIPL_METHOD_CCW_VM,
+	REIPL_METHOD_FCP_RO_DIAG,
+	REIPL_METHOD_FCP_RW_DIAG,
+	REIPL_METHOD_FCP_RO_VM,
+	REIPL_METHOD_FCP_DUMP,
+	REIPL_METHOD_NSS,
+	REIPL_METHOD_NSS_DIAG,
+	REIPL_METHOD_DEFAULT,
+};
+
+enum dump_method {
+	DUMP_METHOD_NONE,
+	DUMP_METHOD_CCW_CIO,
+	DUMP_METHOD_CCW_DIAG,
+	DUMP_METHOD_CCW_VM,
+	DUMP_METHOD_FCP_DIAG,
+};
+
+static int diag308_set_works = 0;
+
+static struct ipl_parameter_block ipl_block;
+
+static int reipl_capabilities = IPL_TYPE_UNKNOWN;
+
+static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
+static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
+static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_ccw;
+static struct ipl_parameter_block *reipl_block_nss;
+static struct ipl_parameter_block *reipl_block_actual;
+
+static int dump_capabilities = DUMP_TYPE_NONE;
+static enum dump_type dump_type = DUMP_TYPE_NONE;
+static enum dump_method dump_method = DUMP_METHOD_NONE;
+static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_ccw;
+
+static struct sclp_ipl_info sclp_ipl_info;
+
+int diag308(unsigned long subcode, void *addr)
+{
+	register unsigned long _addr asm("0") = (unsigned long) addr;
+	register unsigned long _rc asm("1") = 0;
+
+	asm volatile(
+		"	diag	%0,%2,0x308\n"
+		"0:\n"
+		EX_TABLE(0b,0b)
+		: "+d" (_addr), "+d" (_rc)
+		: "d" (subcode) : "cc", "memory");
+	return _rc;
+}
+EXPORT_SYMBOL_GPL(diag308);
+
+/* SYSFS */
+
+#define IPL_ATTR_SHOW_FN(_prefix, _name, _format, args...)		\
+static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		char *page)						\
+{									\
+	return snprintf(page, PAGE_SIZE, _format, ##args);		\
+}
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
+IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value)			\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL)
+
+#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)	\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value)	\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	unsigned long long value;					\
+	if (sscanf(buf, _fmt_in, &value) != 1)				\
+		return -EINVAL;						\
+	_value = value;							\
+	return len;							\
+}									\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name,(S_IRUGO | S_IWUSR),				\
+			sys_##_prefix##_##_name##_show,			\
+			sys_##_prefix##_##_name##_store)
+
+#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value)			\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	strncpy(_value, buf, sizeof(_value) - 1);			\
+	strim(_value);							\
+	return len;							\
+}									\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name,(S_IRUGO | S_IWUSR),				\
+			sys_##_prefix##_##_name##_show,			\
+			sys_##_prefix##_##_name##_store)
+
+static void make_attrs_ro(struct attribute **attrs)
+{
+	while (*attrs) {
+		(*attrs)->mode = S_IRUGO;
+		attrs++;
+	}
+}
+
+/*
+ * ipl section
+ */
+
+static __init enum ipl_type get_ipl_type(void)
+{
+	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
+
+	if (ipl_flags & IPL_NSS_VALID)
+		return IPL_TYPE_NSS;
+	if (!(ipl_flags & IPL_DEVNO_VALID))
+		return IPL_TYPE_UNKNOWN;
+	if (!(ipl_flags & IPL_PARMBLOCK_VALID))
+		return IPL_TYPE_CCW;
+	if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION)
+		return IPL_TYPE_UNKNOWN;
+	if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP)
+		return IPL_TYPE_UNKNOWN;
+	if (ipl->ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
+		return IPL_TYPE_FCP_DUMP;
+	return IPL_TYPE_FCP;
+}
+
+struct ipl_info ipl_info;
+EXPORT_SYMBOL_GPL(ipl_info);
+
+static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
+			     char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+}
+
+static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+
+/* VM IPL PARM routines */
+static size_t reipl_get_ascii_vmparm(char *dest, size_t size,
+				     const struct ipl_parameter_block *ipb)
+{
+	int i;
+	size_t len;
+	char has_lowercase = 0;
+
+	len = 0;
+	if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+	    (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+
+		len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
+		memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+		/* If at least one character is lowercase, we assume mixed
+		 * case; otherwise we convert everything to lowercase.
+		 */
+		for (i = 0; i < len; i++)
+			if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+			    (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+			    (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+				has_lowercase = 1;
+				break;
+			}
+		if (!has_lowercase)
+			EBC_TOLOWER(dest, len);
+		EBCASC(dest, len);
+	}
+	dest[len] = 0;
+
+	return len;
+}
+
+size_t append_ipl_vmparm(char *dest, size_t size)
+{
+	size_t rc;
+
+	rc = 0;
+	if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+		rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
+	else
+		dest[0] = 0;
+	return rc;
+}
+
+static ssize_t ipl_vm_parm_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *page)
+{
+	char parm[DIAG308_VMPARM_SIZE + 1] = {};
+
+	append_ipl_vmparm(parm, sizeof(parm));
+	return sprintf(page, "%s\n", parm);
+}
+
+static size_t scpdata_length(const char* buf, size_t count)
+{
+	while (count) {
+		if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
+			break;
+		count--;
+	}
+	return count;
+}
+
+static size_t reipl_append_ascii_scpdata(char *dest, size_t size,
+					 const struct ipl_parameter_block *ipb)
+{
+	size_t count;
+	size_t i;
+	int has_lowercase;
+
+	count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
+					     ipb->ipl_info.fcp.scp_data_len));
+	if (!count)
+		goto out;
+
+	has_lowercase = 0;
+	for (i = 0; i < count; i++) {
+		if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+			count = 0;
+			goto out;
+		}
+		if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+			has_lowercase = 1;
+	}
+
+	if (has_lowercase)
+		memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+	else
+		for (i = 0; i < count; i++)
+			dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+out:
+	dest[count] = '\0';
+	return count;
+}
+
+size_t append_ipl_scpdata(char *dest, size_t len)
+{
+	size_t rc;
+
+	rc = 0;
+	if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
+		rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
+	else
+		dest[0] = 0;
+	return rc;
+}
+
+
+static struct kobj_attribute sys_ipl_vm_parm_attr =
+	__ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+
+static ssize_t sys_ipl_device_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
+
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		return sprintf(page, "0.0.%04x\n", ipl_devno);
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
+	default:
+		return 0;
+	}
+}
+
+static struct kobj_attribute sys_ipl_device_attr =
+	__ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
+
+static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr, char *buf,
+				  loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
+					IPL_PARMBLOCK_SIZE);
+}
+static struct bin_attribute ipl_parameter_attr =
+	__BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL,
+		   PAGE_SIZE);
+
+static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
+	void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+static struct bin_attribute ipl_scp_data_attr =
+	__BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_scp_data_attr,
+	NULL,
+};
+
+/* FCP ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)
+		   IPL_PARMBLOCK_START->ipl_info.fcp.br_lba);
+
+static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	char loadparm[LOADPARM_LEN + 1] = {};
+
+	if (!sclp_ipl_info.is_valid)
+		return sprintf(page, "#unknown#\n");
+	memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+	EBCASC(loadparm, LOADPARM_LEN);
+	strim(loadparm);
+	return sprintf(page, "%s\n", loadparm);
+}
+
+static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
+	__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
+
+static struct attribute *ipl_fcp_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_fcp_wwpn_attr.attr,
+	&sys_ipl_fcp_lun_attr.attr,
+	&sys_ipl_fcp_bootprog_attr.attr,
+	&sys_ipl_fcp_br_lba_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_fcp_attr_group = {
+	.attrs = ipl_fcp_attrs,
+	.bin_attrs = ipl_fcp_bin_attrs,
+};
+
+/* CCW ipl device attributes */
+
+static struct attribute *ipl_ccw_attrs_vm[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_vm_parm_attr.attr,
+	NULL,
+};
+
+static struct attribute *ipl_ccw_attrs_lpar[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group_vm = {
+	.attrs = ipl_ccw_attrs_vm,
+};
+
+static struct attribute_group ipl_ccw_attr_group_lpar = {
+	.attrs = ipl_ccw_attrs_lpar
+};
+
+/* NSS ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name);
+
+static struct attribute *ipl_nss_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_nss_name_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_vm_parm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_nss_attr_group = {
+	.attrs = ipl_nss_attrs,
+};
+
+/* UNKNOWN ipl device attributes */
+
+static struct attribute *ipl_unknown_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_unknown_attr_group = {
+	.attrs = ipl_unknown_attrs,
+};
+
+static struct kset *ipl_kset;
+
+static void __ipl_run(void *unused)
+{
+	diag308(DIAG308_IPL, NULL);
+	if (MACHINE_IS_VM)
+		__cpcmd("IPL", NULL, 0, NULL);
+	else if (ipl_info.type == IPL_TYPE_CCW)
+		reipl_ccw_dev(&ipl_info.data.ccw.dev_id);
+}
+
+static void ipl_run(struct shutdown_trigger *trigger)
+{
+	smp_call_ipl_cpu(__ipl_run, NULL);
+}
+
+static int __init ipl_init(void)
+{
+	int rc;
+
+	ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj);
+	if (!ipl_kset) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		if (MACHINE_IS_VM)
+			rc = sysfs_create_group(&ipl_kset->kobj,
+						&ipl_ccw_attr_group_vm);
+		else
+			rc = sysfs_create_group(&ipl_kset->kobj,
+						&ipl_ccw_attr_group_lpar);
+		break;
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
+		break;
+	case IPL_TYPE_NSS:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group);
+		break;
+	default:
+		rc = sysfs_create_group(&ipl_kset->kobj,
+					&ipl_unknown_attr_group);
+		break;
+	}
+out:
+	if (rc)
+		panic("ipl_init failed: rc = %i\n", rc);
+
+	return 0;
+}
+
+static struct shutdown_action __refdata ipl_action = {
+	.name	= SHUTDOWN_ACTION_IPL_STR,
+	.fn	= ipl_run,
+	.init	= ipl_init,
+};
+
+/*
+ * reipl shutdown action: Reboot Linux on shutdown.
+ */
+
+/* VM IPL PARM attributes */
+static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
+					  char *page)
+{
+	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+
+	reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+	return sprintf(page, "%s\n", vmparm);
+}
+
+static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
+					  size_t vmparm_max,
+					  const char *buf, size_t len)
+{
+	int i, ip_len;
+
+	/* ignore trailing newline */
+	ip_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		ip_len--;
+
+	if (ip_len > vmparm_max)
+		return -EINVAL;
+
+	/* parm is used to store kernel options, check for common chars */
+	for (i = 0; i < ip_len; i++)
+		if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
+			return -EINVAL;
+
+	memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+	ipb->ipl_info.ccw.vm_parm_len = ip_len;
+	if (ip_len > 0) {
+		ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+		memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
+		ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+	} else {
+		ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+	}
+
+	return len;
+}
+
+/* NSS wrapper */
+static ssize_t reipl_nss_vmparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_vmparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_vmparm_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_vmparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nss_vmparm_attr =
+	__ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
+					reipl_nss_vmparm_store);
+static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
+	__ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
+					reipl_ccw_vmparm_store);
+
+/* FCP reipl device attributes */
+
+static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
+				      struct bin_attribute *attr,
+				      char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
+	void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t padding;
+	size_t scpdata_len;
+
+	if (off < 0)
+		return -EINVAL;
+
+	if (off >= DIAG308_SCPDATA_SIZE)
+		return -ENOSPC;
+
+	if (count > DIAG308_SCPDATA_SIZE - off)
+		count = DIAG308_SCPDATA_SIZE - off;
+
+	memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
+	scpdata_len = off + count;
+
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
+	reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
+	reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
+
+	return count;
+}
+static struct bin_attribute sys_reipl_fcp_scp_data_attr =
+	__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
+		   reipl_fcp_scpdata_write, PAGE_SIZE);
+
+static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+	&sys_reipl_fcp_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
+		   reipl_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
+		   reipl_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
+		   reipl_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   reipl_block_fcp->ipl_info.fcp.devno);
+
+static void reipl_get_ascii_loadparm(char *loadparm,
+				     struct ipl_parameter_block *ibp)
+{
+	memcpy(loadparm, ibp->hdr.loadparm, LOADPARM_LEN);
+	EBCASC(loadparm, LOADPARM_LEN);
+	loadparm[LOADPARM_LEN] = 0;
+	strim(loadparm);
+}
+
+static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
+					   char *page)
+{
+	char buf[LOADPARM_LEN + 1];
+
+	reipl_get_ascii_loadparm(buf, ipb);
+	return sprintf(page, "%s\n", buf);
+}
+
+static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
+					    const char *buf, size_t len)
+{
+	int i, lp_len;
+
+	/* ignore trailing newline */
+	lp_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		lp_len--;
+	/* loadparm can have max 8 characters and must not start with a blank */
+	if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' ')))
+		return -EINVAL;
+	/* loadparm can only contain "a-z,A-Z,0-9,SP,." */
+	for (i = 0; i < lp_len; i++) {
+		if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') ||
+		    (buf[i] == '.'))
+			continue;
+		return -EINVAL;
+	}
+	/* initialize loadparm with blanks */
+	memset(ipb->hdr.loadparm, ' ', LOADPARM_LEN);
+	/* copy and convert to ebcdic */
+	memcpy(ipb->hdr.loadparm, buf, lp_len);
+	ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
+	return len;
+}
+
+/* FCP wrapper */
+static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_loadparm_show(reipl_block_fcp, page);
+}
+
+static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t len)
+{
+	return reipl_generic_loadparm_store(reipl_block_fcp, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
+	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
+					    reipl_fcp_loadparm_store);
+
+static struct attribute *reipl_fcp_attrs[] = {
+	&sys_reipl_fcp_device_attr.attr,
+	&sys_reipl_fcp_wwpn_attr.attr,
+	&sys_reipl_fcp_lun_attr.attr,
+	&sys_reipl_fcp_bootprog_attr.attr,
+	&sys_reipl_fcp_br_lba_attr.attr,
+	&sys_reipl_fcp_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_fcp_attr_group = {
+	.attrs = reipl_fcp_attrs,
+	.bin_attrs = reipl_fcp_bin_attrs,
+};
+
+/* CCW reipl device attributes */
+
+DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+	reipl_block_ccw->ipl_info.ccw.devno);
+
+/* NSS wrapper */
+static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_loadparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t len)
+{
+	return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_loadparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t len)
+{
+	return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
+	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
+					    reipl_ccw_loadparm_store);
+
+static struct attribute *reipl_ccw_attrs_vm[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	&sys_reipl_ccw_loadparm_attr.attr,
+	&sys_reipl_ccw_vmparm_attr.attr,
+	NULL,
+};
+
+static struct attribute *reipl_ccw_attrs_lpar[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	&sys_reipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group_vm = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs_vm,
+};
+
+static struct attribute_group reipl_ccw_attr_group_lpar = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs_lpar,
+};
+
+
+/* NSS reipl device attributes */
+static void reipl_get_ascii_nss_name(char *dst,
+				     struct ipl_parameter_block *ipb)
+{
+	memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+	EBCASC(dst, NSS_NAME_SIZE);
+	dst[NSS_NAME_SIZE] = 0;
+}
+
+static ssize_t reipl_nss_name_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	char nss_name[NSS_NAME_SIZE + 1] = {};
+
+	reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
+	return sprintf(page, "%s\n", nss_name);
+}
+
+static ssize_t reipl_nss_name_store(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    const char *buf, size_t len)
+{
+	int nss_len;
+
+	/* ignore trailing newline */
+	nss_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		nss_len--;
+
+	if (nss_len > NSS_NAME_SIZE)
+		return -EINVAL;
+
+	memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+	if (nss_len > 0) {
+		reipl_block_nss->ipl_info.ccw.vm_flags |=
+			DIAG308_VM_FLAGS_NSS_VALID;
+		memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
+		ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+		EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+	} else {
+		reipl_block_nss->ipl_info.ccw.vm_flags &=
+			~DIAG308_VM_FLAGS_NSS_VALID;
+	}
+
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_nss_name_attr =
+	__ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
+					reipl_nss_name_store);
+
+static struct kobj_attribute sys_reipl_nss_loadparm_attr =
+	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
+					    reipl_nss_loadparm_store);
+
+static struct attribute *reipl_nss_attrs[] = {
+	&sys_reipl_nss_name_attr.attr,
+	&sys_reipl_nss_loadparm_attr.attr,
+	&sys_reipl_nss_vmparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_nss_attr_group = {
+	.name  = IPL_NSS_STR,
+	.attrs = reipl_nss_attrs,
+};
+
+static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block)
+{
+	reipl_block_actual = reipl_block;
+	os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+			  reipl_block->hdr.len);
+}
+
+/* reipl type */
+
+static int reipl_set_type(enum ipl_type type)
+{
+	if (!(reipl_capabilities & type))
+		return -EINVAL;
+
+	switch(type) {
+	case IPL_TYPE_CCW:
+		if (diag308_set_works)
+			reipl_method = REIPL_METHOD_CCW_DIAG;
+		else if (MACHINE_IS_VM)
+			reipl_method = REIPL_METHOD_CCW_VM;
+		else
+			reipl_method = REIPL_METHOD_CCW_CIO;
+		set_reipl_block_actual(reipl_block_ccw);
+		break;
+	case IPL_TYPE_FCP:
+		if (diag308_set_works)
+			reipl_method = REIPL_METHOD_FCP_RW_DIAG;
+		else if (MACHINE_IS_VM)
+			reipl_method = REIPL_METHOD_FCP_RO_VM;
+		else
+			reipl_method = REIPL_METHOD_FCP_RO_DIAG;
+		set_reipl_block_actual(reipl_block_fcp);
+		break;
+	case IPL_TYPE_FCP_DUMP:
+		reipl_method = REIPL_METHOD_FCP_DUMP;
+		break;
+	case IPL_TYPE_NSS:
+		if (diag308_set_works)
+			reipl_method = REIPL_METHOD_NSS_DIAG;
+		else
+			reipl_method = REIPL_METHOD_NSS;
+		set_reipl_block_actual(reipl_block_nss);
+		break;
+	case IPL_TYPE_UNKNOWN:
+		reipl_method = REIPL_METHOD_DEFAULT;
+		break;
+	default:
+		BUG();
+	}
+	reipl_type = type;
+	return 0;
+}
+
+static ssize_t reipl_type_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+}
+
+static ssize_t reipl_type_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_CCW);
+	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_FCP);
+	else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_NSS);
+	return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute reipl_type_attr =
+	__ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
+
+static struct kset *reipl_kset;
+static struct kset *reipl_fcp_kset;
+
+static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
+			   const enum ipl_method m)
+{
+	char loadparm[LOADPARM_LEN + 1] = {};
+	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+	char nss_name[NSS_NAME_SIZE + 1] = {};
+	size_t pos = 0;
+
+	reipl_get_ascii_loadparm(loadparm, ipb);
+	reipl_get_ascii_nss_name(nss_name, ipb);
+	reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+
+	switch (m) {
+	case REIPL_METHOD_CCW_VM:
+		pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno);
+		break;
+	case REIPL_METHOD_NSS:
+		pos = sprintf(dst, "IPL %s", nss_name);
+		break;
+	default:
+		break;
+	}
+	if (strlen(loadparm) > 0)
+		pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm);
+	if (strlen(vmparm) > 0)
+		sprintf(dst + pos, " PARM %s", vmparm);
+}
+
+static void __reipl_run(void *unused)
+{
+	struct ccw_dev_id devid;
+	static char buf[128];
+
+	switch (reipl_method) {
+	case REIPL_METHOD_CCW_CIO:
+		devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
+		devid.ssid  = 0;
+		reipl_ccw_dev(&devid);
+		break;
+	case REIPL_METHOD_CCW_VM:
+		get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM);
+		__cpcmd(buf, NULL, 0, NULL);
+		break;
+	case REIPL_METHOD_CCW_DIAG:
+		diag308(DIAG308_SET, reipl_block_ccw);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case REIPL_METHOD_FCP_RW_DIAG:
+		diag308(DIAG308_SET, reipl_block_fcp);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case REIPL_METHOD_FCP_RO_DIAG:
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case REIPL_METHOD_FCP_RO_VM:
+		__cpcmd("IPL", NULL, 0, NULL);
+		break;
+	case REIPL_METHOD_NSS_DIAG:
+		diag308(DIAG308_SET, reipl_block_nss);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case REIPL_METHOD_NSS:
+		get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
+		__cpcmd(buf, NULL, 0, NULL);
+		break;
+	case REIPL_METHOD_DEFAULT:
+		if (MACHINE_IS_VM)
+			__cpcmd("IPL", NULL, 0, NULL);
+		diag308(DIAG308_IPL, NULL);
+		break;
+	case REIPL_METHOD_FCP_DUMP:
+		break;
+	}
+	disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+static void reipl_run(struct shutdown_trigger *trigger)
+{
+	smp_call_ipl_cpu(__reipl_run, NULL);
+}
+
+static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
+{
+	ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
+	ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+	ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+}
+
+static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
+{
+	/* LOADPARM */
+	/* check if read scp info worked and set loadparm */
+	if (sclp_ipl_info.is_valid)
+		memcpy(ipb->hdr.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+	else
+		/* read scp info failed: set empty loadparm (EBCDIC blanks) */
+		memset(ipb->hdr.loadparm, 0x40, LOADPARM_LEN);
+	ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+
+	/* VM PARM */
+	if (MACHINE_IS_VM && diag308_set_works &&
+	    (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+
+		ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+		ipb->ipl_info.ccw.vm_parm_len =
+					ipl_block.ipl_info.ccw.vm_parm_len;
+		memcpy(ipb->ipl_info.ccw.vm_parm,
+		       ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+	}
+}
+
+static int __init reipl_nss_init(void)
+{
+	int rc;
+
+	if (!MACHINE_IS_VM)
+		return 0;
+
+	reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_nss)
+		return -ENOMEM;
+
+	if (!diag308_set_works)
+		sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO;
+
+	rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
+	if (rc)
+		return rc;
+
+	reipl_block_ccw_init(reipl_block_nss);
+	if (ipl_info.type == IPL_TYPE_NSS) {
+		memset(reipl_block_nss->ipl_info.ccw.nss_name,
+			' ', NSS_NAME_SIZE);
+		memcpy(reipl_block_nss->ipl_info.ccw.nss_name,
+			kernel_nss_name, strlen(kernel_nss_name));
+		ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+		reipl_block_nss->ipl_info.ccw.vm_flags |=
+			DIAG308_VM_FLAGS_NSS_VALID;
+
+		reipl_block_ccw_fill_parms(reipl_block_nss);
+	}
+
+	reipl_capabilities |= IPL_TYPE_NSS;
+	return 0;
+}
+
+static int __init reipl_ccw_init(void)
+{
+	int rc;
+
+	reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_ccw)
+		return -ENOMEM;
+
+	if (MACHINE_IS_VM) {
+		if (!diag308_set_works)
+			sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO;
+		rc = sysfs_create_group(&reipl_kset->kobj,
+					&reipl_ccw_attr_group_vm);
+	} else {
+		if(!diag308_set_works)
+			sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
+		rc = sysfs_create_group(&reipl_kset->kobj,
+					&reipl_ccw_attr_group_lpar);
+	}
+	if (rc)
+		return rc;
+
+	reipl_block_ccw_init(reipl_block_ccw);
+	if (ipl_info.type == IPL_TYPE_CCW) {
+		reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+		reipl_block_ccw_fill_parms(reipl_block_ccw);
+	}
+
+	reipl_capabilities |= IPL_TYPE_CCW;
+	return 0;
+}
+
+static int __init reipl_fcp_init(void)
+{
+	int rc;
+
+	if (!diag308_set_works) {
+		if (ipl_info.type == IPL_TYPE_FCP) {
+			make_attrs_ro(reipl_fcp_attrs);
+			sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
+		} else
+			return 0;
+	}
+
+	reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_fcp)
+		return -ENOMEM;
+
+	/* sysfs: create fcp kset for mixing attr group and bin attrs */
+	reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
+					     &reipl_kset->kobj);
+	if (!reipl_fcp_kset) {
+		free_page((unsigned long) reipl_block_fcp);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+	if (rc) {
+		kset_unregister(reipl_fcp_kset);
+		free_page((unsigned long) reipl_block_fcp);
+		return rc;
+	}
+
+	if (ipl_info.type == IPL_TYPE_FCP) {
+		memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
+		/*
+		 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+		 * is invalid in the SCSI IPL parameter block, so take it
+		 * always from sclp_ipl_info.
+		 */
+		memcpy(reipl_block_fcp->hdr.loadparm, sclp_ipl_info.loadparm,
+		       LOADPARM_LEN);
+	} else {
+		reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
+		reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+		reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_FCP;
+	return 0;
+}
+
+static int __init reipl_type_init(void)
+{
+	enum ipl_type reipl_type = ipl_info.type;
+	struct ipl_parameter_block *reipl_block;
+	unsigned long size;
+
+	reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size);
+	if (!reipl_block)
+		goto out;
+	/*
+	 * If we have an OS info reipl block, this will be used
+	 */
+	if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+		memcpy(reipl_block_fcp, reipl_block, size);
+		reipl_type = IPL_TYPE_FCP;
+	} else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+		memcpy(reipl_block_ccw, reipl_block, size);
+		reipl_type = IPL_TYPE_CCW;
+	}
+out:
+	return reipl_set_type(reipl_type);
+}
+
+static int __init reipl_init(void)
+{
+	int rc;
+
+	reipl_kset = kset_create_and_add("reipl", NULL, firmware_kobj);
+	if (!reipl_kset)
+		return -ENOMEM;
+	rc = sysfs_create_file(&reipl_kset->kobj, &reipl_type_attr.attr);
+	if (rc) {
+		kset_unregister(reipl_kset);
+		return rc;
+	}
+	rc = reipl_ccw_init();
+	if (rc)
+		return rc;
+	rc = reipl_fcp_init();
+	if (rc)
+		return rc;
+	rc = reipl_nss_init();
+	if (rc)
+		return rc;
+	return reipl_type_init();
+}
+
+static struct shutdown_action __refdata reipl_action = {
+	.name	= SHUTDOWN_ACTION_REIPL_STR,
+	.fn	= reipl_run,
+	.init	= reipl_init,
+};
+
+/*
+ * dump shutdown action: Dump Linux on shutdown.
+ */
+
+/* FCP dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
+		   dump_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
+		   dump_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+		   dump_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
+		   dump_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   dump_block_fcp->ipl_info.fcp.devno);
+
+static struct attribute *dump_fcp_attrs[] = {
+	&sys_dump_fcp_device_attr.attr,
+	&sys_dump_fcp_wwpn_attr.attr,
+	&sys_dump_fcp_lun_attr.attr,
+	&sys_dump_fcp_bootprog_attr.attr,
+	&sys_dump_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_fcp_attr_group = {
+	.name  = IPL_FCP_STR,
+	.attrs = dump_fcp_attrs,
+};
+
+/* CCW dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   dump_block_ccw->ipl_info.ccw.devno);
+
+static struct attribute *dump_ccw_attrs[] = {
+	&sys_dump_ccw_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_ccw_attr_group = {
+	.name  = IPL_CCW_STR,
+	.attrs = dump_ccw_attrs,
+};
+
+/* dump type */
+
+static int dump_set_type(enum dump_type type)
+{
+	if (!(dump_capabilities & type))
+		return -EINVAL;
+	switch (type) {
+	case DUMP_TYPE_CCW:
+		if (diag308_set_works)
+			dump_method = DUMP_METHOD_CCW_DIAG;
+		else if (MACHINE_IS_VM)
+			dump_method = DUMP_METHOD_CCW_VM;
+		else
+			dump_method = DUMP_METHOD_CCW_CIO;
+		break;
+	case DUMP_TYPE_FCP:
+		dump_method = DUMP_METHOD_FCP_DIAG;
+		break;
+	default:
+		dump_method = DUMP_METHOD_NONE;
+	}
+	dump_type = type;
+	return 0;
+}
+
+static ssize_t dump_type_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", dump_type_str(dump_type));
+}
+
+static ssize_t dump_type_store(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       const char *buf, size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_NONE);
+	else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_CCW);
+	else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_FCP);
+	return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute dump_type_attr =
+	__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+
+static struct kset *dump_kset;
+
+static void diag308_dump(void *dump_block)
+{
+	diag308(DIAG308_SET, dump_block);
+	while (1) {
+		if (diag308(DIAG308_DUMP, NULL) != 0x302)
+			break;
+		udelay_simple(USEC_PER_SEC);
+	}
+}
+
+static void __dump_run(void *unused)
+{
+	struct ccw_dev_id devid;
+	static char buf[100];
+
+	switch (dump_method) {
+	case DUMP_METHOD_CCW_CIO:
+		devid.devno = dump_block_ccw->ipl_info.ccw.devno;
+		devid.ssid  = 0;
+		reipl_ccw_dev(&devid);
+		break;
+	case DUMP_METHOD_CCW_VM:
+		sprintf(buf, "STORE STATUS");
+		__cpcmd(buf, NULL, 0, NULL);
+		sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
+		__cpcmd(buf, NULL, 0, NULL);
+		break;
+	case DUMP_METHOD_CCW_DIAG:
+		diag308_dump(dump_block_ccw);
+		break;
+	case DUMP_METHOD_FCP_DIAG:
+		diag308_dump(dump_block_fcp);
+		break;
+	default:
+		break;
+	}
+}
+
+static void dump_run(struct shutdown_trigger *trigger)
+{
+	if (dump_method == DUMP_METHOD_NONE)
+		return;
+	smp_send_stop();
+	smp_call_ipl_cpu(__dump_run, NULL);
+}
+
+static int __init dump_ccw_init(void)
+{
+	int rc;
+
+	dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_ccw)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_ccw_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_ccw);
+		return rc;
+	}
+	dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+	dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+	dump_capabilities |= DUMP_TYPE_CCW;
+	return 0;
+}
+
+static int __init dump_fcp_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump)
+		return 0; /* LDIPL DUMP is not installed */
+	if (!diag308_set_works)
+		return 0;
+	dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_fcp)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_fcp_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_fcp);
+		return rc;
+	}
+	dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+	dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
+	dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+	dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_FCP;
+	return 0;
+}
+
+static int __init dump_init(void)
+{
+	int rc;
+
+	dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
+	if (!dump_kset)
+		return -ENOMEM;
+	rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+	if (rc) {
+		kset_unregister(dump_kset);
+		return rc;
+	}
+	rc = dump_ccw_init();
+	if (rc)
+		return rc;
+	rc = dump_fcp_init();
+	if (rc)
+		return rc;
+	dump_set_type(DUMP_TYPE_NONE);
+	return 0;
+}
+
+static struct shutdown_action __refdata dump_action = {
+	.name	= SHUTDOWN_ACTION_DUMP_STR,
+	.fn	= dump_run,
+	.init	= dump_init,
+};
+
+static void dump_reipl_run(struct shutdown_trigger *trigger)
+{
+	unsigned long ipib = (unsigned long) reipl_block_actual;
+	unsigned int csum;
+
+	csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+	mem_assign_absolute(S390_lowcore.ipib, ipib);
+	mem_assign_absolute(S390_lowcore.ipib_checksum, csum);
+	dump_run(trigger);
+}
+
+static int __init dump_reipl_init(void)
+{
+	if (!diag308_set_works)
+		return -EOPNOTSUPP;
+	else
+		return 0;
+}
+
+static struct shutdown_action __refdata dump_reipl_action = {
+	.name	= SHUTDOWN_ACTION_DUMP_REIPL_STR,
+	.fn	= dump_reipl_run,
+	.init	= dump_reipl_init,
+};
+
+/*
+ * vmcmd shutdown action: Trigger vm command on shutdown.
+ */
+
+static char vmcmd_on_reboot[128];
+static char vmcmd_on_panic[128];
+static char vmcmd_on_halt[128];
+static char vmcmd_on_poff[128];
+static char vmcmd_on_restart[128];
+
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart);
+
+static struct attribute *vmcmd_attrs[] = {
+	&sys_vmcmd_on_reboot_attr.attr,
+	&sys_vmcmd_on_panic_attr.attr,
+	&sys_vmcmd_on_halt_attr.attr,
+	&sys_vmcmd_on_poff_attr.attr,
+	&sys_vmcmd_on_restart_attr.attr,
+	NULL,
+};
+
+static struct attribute_group vmcmd_attr_group = {
+	.attrs = vmcmd_attrs,
+};
+
+static struct kset *vmcmd_kset;
+
+static void vmcmd_run(struct shutdown_trigger *trigger)
+{
+	char *cmd;
+
+	if (strcmp(trigger->name, ON_REIPL_STR) == 0)
+		cmd = vmcmd_on_reboot;
+	else if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+		cmd = vmcmd_on_panic;
+	else if (strcmp(trigger->name, ON_HALT_STR) == 0)
+		cmd = vmcmd_on_halt;
+	else if (strcmp(trigger->name, ON_POFF_STR) == 0)
+		cmd = vmcmd_on_poff;
+	else if (strcmp(trigger->name, ON_RESTART_STR) == 0)
+		cmd = vmcmd_on_restart;
+	else
+		return;
+
+	if (strlen(cmd) == 0)
+		return;
+	__cpcmd(cmd, NULL, 0, NULL);
+}
+
+static int vmcmd_init(void)
+{
+	if (!MACHINE_IS_VM)
+		return -EOPNOTSUPP;
+	vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
+	if (!vmcmd_kset)
+		return -ENOMEM;
+	return sysfs_create_group(&vmcmd_kset->kobj, &vmcmd_attr_group);
+}
+
+static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
+					      vmcmd_run, vmcmd_init};
+
+/*
+ * stop shutdown action: Stop Linux on shutdown.
+ */
+
+static void stop_run(struct shutdown_trigger *trigger)
+{
+	if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+	    strcmp(trigger->name, ON_RESTART_STR) == 0)
+		disabled_wait((unsigned long) __builtin_return_address(0));
+	smp_stop_cpu();
+}
+
+static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
+					     stop_run, NULL};
+
+/* action list */
+
+static struct shutdown_action *shutdown_actions_list[] = {
+	&ipl_action, &reipl_action, &dump_reipl_action, &dump_action,
+	&vmcmd_action, &stop_action};
+#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *))
+
+/*
+ * Trigger section
+ */
+
+static struct kset *shutdown_actions_kset;
+
+static int set_trigger(const char *buf, struct shutdown_trigger *trigger,
+		       size_t len)
+{
+	int i;
+
+	for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+		if (sysfs_streq(buf, shutdown_actions_list[i]->name)) {
+			if (shutdown_actions_list[i]->init_rc) {
+				return shutdown_actions_list[i]->init_rc;
+			} else {
+				trigger->action = shutdown_actions_list[i];
+				return len;
+			}
+		}
+	}
+	return -EINVAL;
+}
+
+/* on reipl */
+
+static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
+						    &reipl_action};
+
+static ssize_t on_reboot_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+}
+
+static ssize_t on_reboot_store(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_reboot_trigger, len);
+}
+static struct kobj_attribute on_reboot_attr = __ATTR_RW(on_reboot);
+
+static void do_machine_restart(char *__unused)
+{
+	smp_send_stop();
+	on_reboot_trigger.action->fn(&on_reboot_trigger);
+	reipl_run(NULL);
+}
+void (*_machine_restart)(char *command) = do_machine_restart;
+
+/* on panic */
+
+static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
+
+static ssize_t on_panic_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_panic_trigger.action->name);
+}
+
+static ssize_t on_panic_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_panic_trigger, len);
+}
+static struct kobj_attribute on_panic_attr = __ATTR_RW(on_panic);
+
+static void do_panic(void)
+{
+	lgr_info_log();
+	on_panic_trigger.action->fn(&on_panic_trigger);
+	stop_run(&on_panic_trigger);
+}
+
+/* on restart */
+
+static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
+	&stop_action};
+
+static ssize_t on_restart_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_restart_trigger.action->name);
+}
+
+static ssize_t on_restart_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_restart_trigger, len);
+}
+static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
+
+static void __do_restart(void *ignore)
+{
+	__arch_local_irq_stosm(0x04); /* enable DAT */
+	smp_send_stop();
+#ifdef CONFIG_CRASH_DUMP
+	crash_kexec(NULL);
+#endif
+	on_restart_trigger.action->fn(&on_restart_trigger);
+	stop_run(&on_restart_trigger);
+}
+
+void do_restart(void)
+{
+	tracing_off();
+	debug_locks_off();
+	lgr_info_log();
+	smp_call_online_cpu(__do_restart, NULL);
+}
+
+/* on halt */
+
+static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
+
+static ssize_t on_halt_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_halt_trigger.action->name);
+}
+
+static ssize_t on_halt_store(struct kobject *kobj,
+			     struct kobj_attribute *attr,
+			     const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_halt_trigger, len);
+}
+static struct kobj_attribute on_halt_attr = __ATTR_RW(on_halt);
+
+static void do_machine_halt(void)
+{
+	smp_send_stop();
+	on_halt_trigger.action->fn(&on_halt_trigger);
+	stop_run(&on_halt_trigger);
+}
+void (*_machine_halt)(void) = do_machine_halt;
+
+/* on power off */
+
+static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
+
+static ssize_t on_poff_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_poff_trigger.action->name);
+}
+
+static ssize_t on_poff_store(struct kobject *kobj,
+			     struct kobj_attribute *attr,
+			     const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_poff_trigger, len);
+}
+static struct kobj_attribute on_poff_attr = __ATTR_RW(on_poff);
+
+static void do_machine_power_off(void)
+{
+	smp_send_stop();
+	on_poff_trigger.action->fn(&on_poff_trigger);
+	stop_run(&on_poff_trigger);
+}
+void (*_machine_power_off)(void) = do_machine_power_off;
+
+static struct attribute *shutdown_action_attrs[] = {
+	&on_restart_attr.attr,
+	&on_reboot_attr.attr,
+	&on_panic_attr.attr,
+	&on_halt_attr.attr,
+	&on_poff_attr.attr,
+	NULL,
+};
+
+static struct attribute_group shutdown_action_attr_group = {
+	.attrs = shutdown_action_attrs,
+};
+
+static void __init shutdown_triggers_init(void)
+{
+	shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL,
+						    firmware_kobj);
+	if (!shutdown_actions_kset)
+		goto fail;
+	if (sysfs_create_group(&shutdown_actions_kset->kobj,
+			       &shutdown_action_attr_group))
+		goto fail;
+	return;
+fail:
+	panic("shutdown_triggers_init failed\n");
+}
+
+static void __init shutdown_actions_init(void)
+{
+	int i;
+
+	for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+		if (!shutdown_actions_list[i]->init)
+			continue;
+		shutdown_actions_list[i]->init_rc =
+			shutdown_actions_list[i]->init();
+	}
+}
+
+static int __init s390_ipl_init(void)
+{
+	char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40};
+
+	sclp_get_ipl_info(&sclp_ipl_info);
+	/*
+	 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+	 * returned by read SCP info is invalid (contains EBCDIC blanks)
+	 * when the system has been booted via diag308. In that case we use
+	 * the value from diag308, if available.
+	 *
+	 * There are also systems where diag308 store does not work in
+	 * case the system is booted from HMC. Fortunately in this case
+	 * READ SCP info provides the correct value.
+	 */
+	if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 &&
+	    diag308_set_works)
+		memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm,
+		       LOADPARM_LEN);
+	shutdown_actions_init();
+	shutdown_triggers_init();
+	return 0;
+}
+
+__initcall(s390_ipl_init);
+
+static void __init strncpy_skip_quote(char *dst, char *src, int n)
+{
+	int sx, dx;
+
+	dx = 0;
+	for (sx = 0; src[sx] != 0; sx++) {
+		if (src[sx] == '"')
+			continue;
+		dst[dx++] = src[sx];
+		if (dx >= n)
+			break;
+	}
+}
+
+static int __init vmcmd_on_reboot_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_reboot, str, 127);
+	vmcmd_on_reboot[127] = 0;
+	on_reboot_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmreboot=", vmcmd_on_reboot_setup);
+
+static int __init vmcmd_on_panic_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_panic, str, 127);
+	vmcmd_on_panic[127] = 0;
+	on_panic_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmpanic=", vmcmd_on_panic_setup);
+
+static int __init vmcmd_on_halt_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_halt, str, 127);
+	vmcmd_on_halt[127] = 0;
+	on_halt_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmhalt=", vmcmd_on_halt_setup);
+
+static int __init vmcmd_on_poff_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_poff, str, 127);
+	vmcmd_on_poff[127] = 0;
+	on_poff_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmpoff=", vmcmd_on_poff_setup);
+
+static int on_panic_notify(struct notifier_block *self,
+			   unsigned long event, void *data)
+{
+	do_panic();
+	return NOTIFY_OK;
+}
+
+static struct notifier_block on_panic_nb = {
+	.notifier_call = on_panic_notify,
+	.priority = INT_MIN,
+};
+
+void __init setup_ipl(void)
+{
+	ipl_info.type = get_ipl_type();
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		ipl_info.data.ccw.dev_id.devno = ipl_devno;
+		ipl_info.data.ccw.dev_id.ssid = 0;
+		break;
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		ipl_info.data.fcp.dev_id.devno =
+			IPL_PARMBLOCK_START->ipl_info.fcp.devno;
+		ipl_info.data.fcp.dev_id.ssid = 0;
+		ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
+		ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
+		break;
+	case IPL_TYPE_NSS:
+		strncpy(ipl_info.data.nss.name, kernel_nss_name,
+			sizeof(ipl_info.data.nss.name));
+		break;
+	case IPL_TYPE_UNKNOWN:
+		/* We have no info to copy */
+		break;
+	}
+	atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
+}
+
+void __init ipl_update_parameters(void)
+{
+	int rc;
+
+	rc = diag308(DIAG308_STORE, &ipl_block);
+	if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG))
+		diag308_set_works = 1;
+}
+
+void __init ipl_save_parameters(void)
+{
+	struct cio_iplinfo iplinfo;
+	void *src, *dst;
+
+	if (cio_get_iplinfo(&iplinfo))
+		return;
+
+	ipl_devno = iplinfo.devno;
+	ipl_flags |= IPL_DEVNO_VALID;
+	if (!iplinfo.is_qdio)
+		return;
+	ipl_flags |= IPL_PARMBLOCK_VALID;
+	src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr;
+	dst = (void *)IPL_PARMBLOCK_ORIGIN;
+	memmove(dst, src, PAGE_SIZE);
+	S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
+}
+
+static LIST_HEAD(rcall);
+static DEFINE_MUTEX(rcall_mutex);
+
+void register_reset_call(struct reset_call *reset)
+{
+	mutex_lock(&rcall_mutex);
+	list_add(&reset->list, &rcall);
+	mutex_unlock(&rcall_mutex);
+}
+EXPORT_SYMBOL_GPL(register_reset_call);
+
+void unregister_reset_call(struct reset_call *reset)
+{
+	mutex_lock(&rcall_mutex);
+	list_del(&reset->list);
+	mutex_unlock(&rcall_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_reset_call);
+
+static void do_reset_calls(void)
+{
+	struct reset_call *reset;
+
+	if (diag308_set_works) {
+		diag308_reset();
+		return;
+	}
+	list_for_each_entry(reset, &rcall, list)
+		reset->fn();
+}
+
+u32 dump_prefix_page;
+
+void s390_reset_system(void (*fn_pre)(void),
+		       void (*fn_post)(void *), void *data)
+{
+	struct _lowcore *lc;
+
+	lc = (struct _lowcore *)(unsigned long) store_prefix();
+
+	/* Stack for interrupt/machine check handler */
+	lc->panic_stack = S390_lowcore.panic_stack;
+
+	/* Save prefix page address for dump case */
+	dump_prefix_page = (u32)(unsigned long) lc;
+
+	/* Disable prefixing */
+	set_prefix(0);
+
+	/* Disable lowcore protection */
+	__ctl_clear_bit(0,28);
+
+	/* Set new machine check handler */
+	S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
+	S390_lowcore.mcck_new_psw.addr =
+		PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+
+	/* Set new program check handler */
+	S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
+	S390_lowcore.program_new_psw.addr =
+		PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+
+	/*
+	 * Clear subchannel ID and number to signal new kernel that no CCW or
+	 * SCSI IPL has been done (for kexec and kdump)
+	 */
+	S390_lowcore.subchannel_id = 0;
+	S390_lowcore.subchannel_nr = 0;
+
+	/* Store status at absolute zero */
+	store_status();
+
+	/* Call function before reset */
+	if (fn_pre)
+		fn_pre();
+	do_reset_calls();
+	/* Call function after reset */
+	if (fn_post)
+		fn_post(data);
+}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
new file mode 100644
index 000000000..e9d9addfa
--- /dev/null
+++ b/arch/s390/kernel/irq.c
@@ -0,0 +1,310 @@
+/*
+ *    Copyright IBM Corp. 2004, 2011
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ *		 Thomas Spatzier <tspat@de.ibm.com>,
+ *
+ * This file contains interrupt related functions.
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/cputime.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include "entry.h"
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
+
+struct irq_class {
+	int irq;
+	char *name;
+	char *desc;
+};
+
+/*
+ * The list of "main" irq classes on s390. This is the list of interrupts
+ * that appear both in /proc/stat ("intr" line) and /proc/interrupts.
+ * Historically only external and I/O interrupts have been part of /proc/stat.
+ * We can't add the split external and I/O sub classes since the first field
+ * in the "intr" line in /proc/stat is supposed to be the sum of all other
+ * fields.
+ * Since the external and I/O interrupt fields are already sums we would end
+ * up with having a sum which accounts each interrupt twice.
+ */
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+	{.irq = EXT_INTERRUPT,	.name = "EXT"},
+	{.irq = IO_INTERRUPT,	.name = "I/O"},
+	{.irq = THIN_INTERRUPT, .name = "AIO"},
+};
+
+/*
+ * The list of split external and I/O interrupts that appear only in
+ * /proc/interrupts.
+ * In addition this list contains non external / I/O events like NMIs.
+ */
+static const struct irq_class irqclass_sub_desc[] = {
+	{.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
+	{.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
+	{.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
+	{.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"},
+	{.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"},
+	{.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+	{.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"},
+	{.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"},
+	{.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"},
+	{.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
+	{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+	{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+	{.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
+	{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+	{.irq = IRQIO_CIO,  .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+	{.irq = IRQIO_QAI,  .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
+	{.irq = IRQIO_DAS,  .name = "DAS", .desc = "[I/O] DASD"},
+	{.irq = IRQIO_C15,  .name = "C15", .desc = "[I/O] 3215"},
+	{.irq = IRQIO_C70,  .name = "C70", .desc = "[I/O] 3270"},
+	{.irq = IRQIO_TAP,  .name = "TAP", .desc = "[I/O] Tape"},
+	{.irq = IRQIO_VMR,  .name = "VMR", .desc = "[I/O] Unit Record Devices"},
+	{.irq = IRQIO_LCS,  .name = "LCS", .desc = "[I/O] LCS"},
+	{.irq = IRQIO_CTC,  .name = "CTC", .desc = "[I/O] CTC"},
+	{.irq = IRQIO_APB,  .name = "APB", .desc = "[I/O] AP Bus"},
+	{.irq = IRQIO_ADM,  .name = "ADM", .desc = "[I/O] EADM Subchannel"},
+	{.irq = IRQIO_CSC,  .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+	{.irq = IRQIO_PCI,  .name = "PCI", .desc = "[I/O] PCI Interrupt" },
+	{.irq = IRQIO_MSI,  .name = "MSI", .desc = "[I/O] MSI Interrupt" },
+	{.irq = IRQIO_VIR,  .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+	{.irq = IRQIO_VAI,  .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
+	{.irq = NMI_NMI,    .name = "NMI", .desc = "[NMI] Machine Check"},
+	{.irq = CPU_RST,    .name = "RST", .desc = "[CPU] CPU Restart"},
+};
+
+void __init init_IRQ(void)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+	init_cio_interrupts();
+	init_airq_interrupts();
+	init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
+	generic_handle_irq(irq);
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+/*
+ * show_interrupts is needed by /proc/interrupts.
+ */
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int index = *(loff_t *) v;
+	int cpu, irq;
+
+	get_online_cpus();
+	if (index == 0) {
+		seq_puts(p, "           ");
+		for_each_online_cpu(cpu)
+			seq_printf(p, "CPU%d       ", cpu);
+		seq_putc(p, '\n');
+	}
+	if (index < NR_IRQS) {
+		if (index >= NR_IRQS_BASE)
+			goto out;
+		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
+		irq = irqclass_main_desc[index].irq;
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+		seq_putc(p, '\n');
+		goto out;
+	}
+	for (index = 0; index < NR_ARCH_IRQS; index++) {
+		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
+		irq = irqclass_sub_desc[index].irq;
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ",
+				   per_cpu(irq_stat, cpu).irqs[irq]);
+		if (irqclass_sub_desc[index].desc)
+			seq_printf(p, "  %s", irqclass_sub_desc[index].desc);
+		seq_putc(p, '\n');
+	}
+out:
+	put_online_cpus();
+	return 0;
+}
+
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+	return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
+}
+
+/*
+ * Switch to the asynchronous interrupt stack for softirq execution.
+ */
+void do_softirq_own_stack(void)
+{
+	unsigned long old, new;
+
+	/* Get current stack pointer. */
+	asm volatile("la %0,0(15)" : "=a" (old));
+	/* Check against async. stack address range. */
+	new = S390_lowcore.async_stack;
+	if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
+		/* Need to switch to the async. stack. */
+		new -= STACK_FRAME_OVERHEAD;
+		((struct stack_frame *) new)->back_chain = old;
+		asm volatile("   la    15,0(%0)\n"
+			     "   basr  14,%2\n"
+			     "   la    15,0(%1)\n"
+			     : : "a" (new), "a" (old),
+			         "a" (__do_softirq)
+			     : "0", "1", "2", "3", "4", "5", "14",
+			       "cc", "memory" );
+	} else {
+		/* We are already on the async stack. */
+		__do_softirq();
+	}
+}
+
+/*
+ * ext_int_hash[index] is the list head for all external interrupts that hash
+ * to this index.
+ */
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
+
+struct ext_int_info {
+	ext_int_handler_t handler;
+	struct hlist_node entry;
+	struct rcu_head rcu;
+	u16 code;
+};
+
+/* ext_int_hash_lock protects the handler lists for external interrupts */
+static DEFINE_SPINLOCK(ext_int_hash_lock);
+
+static inline int ext_hash(u16 code)
+{
+	BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+	return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
+}
+
+int register_external_irq(u16 code, ext_int_handler_t handler)
+{
+	struct ext_int_info *p;
+	unsigned long flags;
+	int index;
+
+	p = kmalloc(sizeof(*p), GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+	p->code = code;
+	p->handler = handler;
+	index = ext_hash(code);
+
+	spin_lock_irqsave(&ext_int_hash_lock, flags);
+	hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
+	spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(register_external_irq);
+
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
+{
+	struct ext_int_info *p;
+	unsigned long flags;
+	int index = ext_hash(code);
+
+	spin_lock_irqsave(&ext_int_hash_lock, flags);
+	hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+		if (p->code == code && p->handler == handler) {
+			hlist_del_rcu(&p->entry);
+			kfree_rcu(p, rcu);
+		}
+	}
+	spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(unregister_external_irq);
+
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
+{
+	struct pt_regs *regs = get_irq_regs();
+	struct ext_code ext_code;
+	struct ext_int_info *p;
+	int index;
+
+	ext_code = *(struct ext_code *) &regs->int_code;
+	if (ext_code.code != EXT_IRQ_CLK_COMP)
+		set_cpu_flag(CIF_NOHZ_DELAY);
+
+	index = ext_hash(ext_code.code);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+		if (unlikely(p->code != ext_code.code))
+			continue;
+		p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+	}
+	rcu_read_unlock();
+	return IRQ_HANDLED;
+}
+
+static struct irqaction external_interrupt = {
+	.name	 = "EXT",
+	.handler = do_ext_interrupt,
+};
+
+void __init init_ext_interrupts(void)
+{
+	int idx;
+
+	for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+		INIT_HLIST_HEAD(&ext_int_hash[idx]);
+
+	irq_set_chip_and_handler(EXT_INTERRUPT,
+				 &dummy_irq_chip, handle_percpu_irq);
+	setup_irq(EXT_INTERRUPT, &external_interrupt);
+}
+
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
+
+void irq_subclass_register(enum irq_subclass subclass)
+{
+	spin_lock(&irq_subclass_lock);
+	if (!irq_subclass_refcount[subclass])
+		ctl_set_bit(0, subclass);
+	irq_subclass_refcount[subclass]++;
+	spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_register);
+
+void irq_subclass_unregister(enum irq_subclass subclass)
+{
+	spin_lock(&irq_subclass_lock);
+	irq_subclass_refcount[subclass]--;
+	if (!irq_subclass_refcount[subclass])
+		ctl_clear_bit(0, subclass);
+	spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 000000000..a90299600
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,109 @@
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <linux/jump_label.h>
+#include <asm/ipl.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+struct insn {
+	u16 opcode;
+	s32 offset;
+} __packed;
+
+struct insn_args {
+	struct jump_entry *entry;
+	enum jump_label_type type;
+};
+
+static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
+{
+	/* brcl 0,0 */
+	insn->opcode = 0xc004;
+	insn->offset = 0;
+}
+
+static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
+{
+	/* brcl 15,offset */
+	insn->opcode = 0xc0f4;
+	insn->offset = (entry->target - entry->code) >> 1;
+}
+
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+			   struct insn *new)
+{
+	unsigned char *ipc = (unsigned char *)entry->code;
+	unsigned char *ipe = (unsigned char *)expected;
+	unsigned char *ipn = (unsigned char *)new;
+
+	pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
+	pr_emerg("Found:    %02x %02x %02x %02x %02x %02x\n",
+		 ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
+	pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
+		 ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
+	pr_emerg("New:      %02x %02x %02x %02x %02x %02x\n",
+		 ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+	panic("Corrupted kernel text");
+}
+
+static struct insn orignop = {
+	.opcode = 0xc004,
+	.offset = JUMP_LABEL_NOP_OFFSET >> 1,
+};
+
+static void __jump_label_transform(struct jump_entry *entry,
+				   enum jump_label_type type,
+				   int init)
+{
+	struct insn old, new;
+
+	if (type == JUMP_LABEL_ENABLE) {
+		jump_label_make_nop(entry, &old);
+		jump_label_make_branch(entry, &new);
+	} else {
+		jump_label_make_branch(entry, &old);
+		jump_label_make_nop(entry, &new);
+	}
+	if (init) {
+		if (memcmp((void *)entry->code, &orignop, sizeof(orignop)))
+			jump_label_bug(entry, &orignop, &new);
+	} else {
+		if (memcmp((void *)entry->code, &old, sizeof(old)))
+			jump_label_bug(entry, &old, &new);
+	}
+	s390_kernel_write((void *)entry->code, &new, sizeof(new));
+}
+
+static int __sm_arch_jump_label_transform(void *data)
+{
+	struct insn_args *args = data;
+
+	__jump_label_transform(args->entry, args->type, 0);
+	return 0;
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	struct insn_args args;
+
+	args.entry = entry;
+	args.type = type;
+
+	stop_machine(__sm_arch_jump_label_transform, &args, NULL);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+	__jump_label_transform(entry, type, 1);
+}
+
+#endif
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
new file mode 100644
index 000000000..389db56a2
--- /dev/null
+++ b/arch/s390/kernel/kprobes.c
@@ -0,0 +1,733 @@
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/stop_machine.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+#include <asm/dis.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = { };
+
+DEFINE_INSN_CACHE_OPS(dmainsn);
+
+static void *alloc_dmainsn_page(void)
+{
+	return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+}
+
+static void free_dmainsn_page(void *page)
+{
+	free_page((unsigned long)page);
+}
+
+struct kprobe_insn_cache kprobe_dmainsn_slots = {
+	.mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
+	.alloc = alloc_dmainsn_page,
+	.free = free_dmainsn_page,
+	.pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+	.insn_size = MAX_INSN_SIZE,
+};
+
+static void copy_instruction(struct kprobe *p)
+{
+	unsigned long ip = (unsigned long) p->addr;
+	s64 disp, new_disp;
+	u64 addr, new_addr;
+
+	if (ftrace_location(ip) == ip) {
+		/*
+		 * If kprobes patches the instruction that is morphed by
+		 * ftrace make sure that kprobes always sees the branch
+		 * "jg .+24" that skips the mcount block or the "brcl 0,0"
+		 * in case of hotpatch.
+		 */
+		ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
+		p->ainsn.is_ftrace_insn = 1;
+	} else
+		memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
+	p->opcode = p->ainsn.insn[0];
+	if (!probe_is_insn_relative_long(p->ainsn.insn))
+		return;
+	/*
+	 * For pc-relative instructions in RIL-b or RIL-c format patch the
+	 * RI2 displacement field. We have already made sure that the insn
+	 * slot for the patched instruction is within the same 2GB area
+	 * as the original instruction (either kernel image or module area).
+	 * Therefore the new displacement will always fit.
+	 */
+	disp = *(s32 *)&p->ainsn.insn[1];
+	addr = (u64)(unsigned long)p->addr;
+	new_addr = (u64)(unsigned long)p->ainsn.insn;
+	new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+	*(s32 *)&p->ainsn.insn[1] = new_disp;
+}
+NOKPROBE_SYMBOL(copy_instruction);
+
+static inline int is_kernel_addr(void *addr)
+{
+	return addr < (void *)_end;
+}
+
+static int s390_get_insn_slot(struct kprobe *p)
+{
+	/*
+	 * Get an insn slot that is within the same 2GB area like the original
+	 * instruction. That way instructions with a 32bit signed displacement
+	 * field can be patched and executed within the insn slot.
+	 */
+	p->ainsn.insn = NULL;
+	if (is_kernel_addr(p->addr))
+		p->ainsn.insn = get_dmainsn_slot();
+	else if (is_module_addr(p->addr))
+		p->ainsn.insn = get_insn_slot();
+	return p->ainsn.insn ? 0 : -ENOMEM;
+}
+NOKPROBE_SYMBOL(s390_get_insn_slot);
+
+static void s390_free_insn_slot(struct kprobe *p)
+{
+	if (!p->ainsn.insn)
+		return;
+	if (is_kernel_addr(p->addr))
+		free_dmainsn_slot(p->ainsn.insn, 0);
+	else
+		free_insn_slot(p->ainsn.insn, 0);
+	p->ainsn.insn = NULL;
+}
+NOKPROBE_SYMBOL(s390_free_insn_slot);
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+	if ((unsigned long) p->addr & 0x01)
+		return -EINVAL;
+	/* Make sure the probe isn't going on a difficult instruction */
+	if (probe_is_prohibited_opcode(p->addr))
+		return -EINVAL;
+	if (s390_get_insn_slot(p))
+		return -ENOMEM;
+	copy_instruction(p);
+	return 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
+
+int arch_check_ftrace_location(struct kprobe *p)
+{
+	return 0;
+}
+
+struct swap_insn_args {
+	struct kprobe *p;
+	unsigned int arm_kprobe : 1;
+};
+
+static int swap_instruction(void *data)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long status = kcb->kprobe_status;
+	struct swap_insn_args *args = data;
+	struct ftrace_insn new_insn, *insn;
+	struct kprobe *p = args->p;
+	size_t len;
+
+	new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+	len = sizeof(new_insn.opc);
+	if (!p->ainsn.is_ftrace_insn)
+		goto skip_ftrace;
+	len = sizeof(new_insn);
+	insn = (struct ftrace_insn *) p->addr;
+	if (args->arm_kprobe) {
+		if (is_ftrace_nop(insn))
+			new_insn.disp = KPROBE_ON_FTRACE_NOP;
+		else
+			new_insn.disp = KPROBE_ON_FTRACE_CALL;
+	} else {
+		ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr);
+		if (insn->disp == KPROBE_ON_FTRACE_NOP)
+			ftrace_generate_nop_insn(&new_insn);
+	}
+skip_ftrace:
+	kcb->kprobe_status = KPROBE_SWAP_INST;
+	s390_kernel_write(p->addr, &new_insn, len);
+	kcb->kprobe_status = status;
+	return 0;
+}
+NOKPROBE_SYMBOL(swap_instruction);
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+	struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
+
+	stop_machine(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+	struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
+
+	stop_machine(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+	s390_free_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
+
+static void enable_singlestep(struct kprobe_ctlblk *kcb,
+			      struct pt_regs *regs,
+			      unsigned long ip)
+{
+	struct per_regs per_kprobe;
+
+	/* Set up the PER control registers %cr9-%cr11 */
+	per_kprobe.control = PER_EVENT_IFETCH;
+	per_kprobe.start = ip;
+	per_kprobe.end = ip;
+
+	/* Save control regs and psw mask */
+	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+	kcb->kprobe_saved_imask = regs->psw.mask &
+		(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
+
+	/* Set PER control regs, turns on single step for the given address */
+	__ctl_load(per_kprobe, 9, 11);
+	regs->psw.mask |= PSW_MASK_PER;
+	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
+	regs->psw.addr = ip | PSW_ADDR_AMODE;
+}
+NOKPROBE_SYMBOL(enable_singlestep);
+
+static void disable_singlestep(struct kprobe_ctlblk *kcb,
+			       struct pt_regs *regs,
+			       unsigned long ip)
+{
+	/* Restore control regs and psw mask, set new psw address */
+	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+	regs->psw.mask &= ~PSW_MASK_PER;
+	regs->psw.mask |= kcb->kprobe_saved_imask;
+	regs->psw.addr = ip | PSW_ADDR_AMODE;
+}
+NOKPROBE_SYMBOL(disable_singlestep);
+
+/*
+ * Activate a kprobe by storing its pointer to current_kprobe. The
+ * previous kprobe is stored in kcb->prev_kprobe. A stack of up to
+ * two kprobes can be active, see KPROBE_REENTER.
+ */
+static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+	kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	__this_cpu_write(current_kprobe, p);
+}
+NOKPROBE_SYMBOL(push_kprobe);
+
+/*
+ * Deactivate a kprobe by backing up to the previous state. If the
+ * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
+ * for any other state prev_kprobe.kp will be NULL.
+ */
+static void pop_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+NOKPROBE_SYMBOL(pop_kprobe);
+
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
+
+	/* Replace the return addr with trampoline addr */
+	regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+
+static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SSDONE:
+	case KPROBE_HIT_ACTIVE:
+		kprobes_inc_nmissed_count(p);
+		break;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+	default:
+		/*
+		 * A kprobe on the code path to single step an instruction
+		 * is a BUG. The code path resides in the .kprobes.text
+		 * section and is executed with interrupts disabled.
+		 */
+		printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr);
+		dump_kprobe(p);
+		BUG();
+	}
+}
+NOKPROBE_SYMBOL(kprobe_reenter_check);
+
+static int kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb;
+	struct kprobe *p;
+
+	/*
+	 * We want to disable preemption for the entire duration of kprobe
+	 * processing. That includes the calls to the pre/post handlers
+	 * and single stepping the kprobe instruction.
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+	p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+
+	if (p) {
+		if (kprobe_running()) {
+			/*
+			 * We have hit a kprobe while another is still
+			 * active. This can happen in the pre and post
+			 * handler. Single step the instruction of the
+			 * new probe but do not call any handler function
+			 * of this secondary kprobe.
+			 * push_kprobe and pop_kprobe saves and restores
+			 * the currently active kprobe.
+			 */
+			kprobe_reenter_check(kcb, p);
+			push_kprobe(kcb, p);
+			kcb->kprobe_status = KPROBE_REENTER;
+		} else {
+			/*
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with single stepping. If we have a
+			 * pre-handler and it returned non-zero, it prepped
+			 * for calling the break_handler below on re-entry
+			 * for jprobe processing, so get out doing nothing
+			 * more here.
+			 */
+			push_kprobe(kcb, p);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+			if (p->pre_handler && p->pre_handler(p, regs))
+				return 1;
+			kcb->kprobe_status = KPROBE_HIT_SS;
+		}
+		enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
+		return 1;
+	} else if (kprobe_running()) {
+		p = __this_cpu_read(current_kprobe);
+		if (p->break_handler && p->break_handler(p, regs)) {
+			/*
+			 * Continuation after the jprobe completed and
+			 * caused the jprobe_return trap. The jprobe
+			 * break_handler "returns" to the original
+			 * function that still has the kprobe breakpoint
+			 * installed. We continue with single stepping.
+			 */
+			kcb->kprobe_status = KPROBE_HIT_SS;
+			enable_singlestep(kcb, regs,
+					  (unsigned long) p->ainsn.insn);
+			return 1;
+		} /* else:
+		   * No kprobe at this address and the current kprobe
+		   * has no break handler (no jprobe!). The kernel just
+		   * exploded, let the standard trap handler pick up the
+		   * pieces.
+		   */
+	} /* else:
+	   * No kprobe at this address and no active kprobe. The trap has
+	   * not been caused by a kprobe breakpoint. The race of breakpoint
+	   * vs. kprobe remove does not exist because on s390 as we use
+	   * stop_machine to arm/disarm the breakpoints.
+	   */
+	preempt_enable_no_resched();
+	return 0;
+}
+NOKPROBE_SYMBOL(kprobe_handler);
+
+/*
+ * Function return probe trampoline:
+ *	- init_kprobes() establishes a probepoint here
+ *	- When the probed function returns, this probe
+ *		causes the handlers to fire
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+	asm volatile(".global kretprobe_trampoline\n"
+		     "kretprobe_trampoline: bcr 0,0\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri;
+	struct hlist_head *head, empty_rp;
+	struct hlist_node *tmp;
+	unsigned long flags, orig_ret_address;
+	unsigned long trampoline_address;
+	kprobe_opcode_t *correct_ret_addr;
+
+	INIT_HLIST_HEAD(&empty_rp);
+	kretprobe_hash_lock(current, &head, &flags);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more than one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *	 function, the first instance's ret_addr will point to the
+	 *	 real return address, and all the rest will point to
+	 *	 kretprobe_trampoline
+	 */
+	ri = NULL;
+	orig_ret_address = 0;
+	correct_ret_addr = NULL;
+	trampoline_address = (unsigned long) &kretprobe_trampoline;
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		orig_ret_address = (unsigned long) ri->ret_addr;
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+	correct_ret_addr = ri->ret_addr;
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		orig_ret_address = (unsigned long) ri->ret_addr;
+
+		if (ri->rp && ri->rp->handler) {
+			ri->ret_addr = correct_ret_addr;
+			ri->rp->handler(ri, regs);
+		}
+
+		recycle_rp_inst(ri, &empty_rp);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+
+	pop_kprobe(get_kprobe_ctlblk());
+	kretprobe_hash_unlock(current, &flags);
+	preempt_enable_no_resched();
+
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+		hlist_del(&ri->hlist);
+		kfree(ri);
+	}
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
+	return 1;
+}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ */
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+	int fixup = probe_get_fixup_type(p->ainsn.insn);
+
+	/* Check if the kprobes location is an enabled ftrace caller */
+	if (p->ainsn.is_ftrace_insn) {
+		struct ftrace_insn *insn = (struct ftrace_insn *) p->addr;
+		struct ftrace_insn call_insn;
+
+		ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr);
+		/*
+		 * A kprobe on an enabled ftrace call site actually single
+		 * stepped an unconditional branch (ftrace nop equivalent).
+		 * Now we need to fixup things and pretend that a brasl r0,...
+		 * was executed instead.
+		 */
+		if (insn->disp == KPROBE_ON_FTRACE_CALL) {
+			ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE;
+			regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn);
+		}
+	}
+
+	if (fixup & FIXUP_PSW_NORMAL)
+		ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
+
+	if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+		int ilen = insn_length(p->ainsn.insn[0] >> 8);
+		if (ip - (unsigned long) p->ainsn.insn == ilen)
+			ip = (unsigned long) p->addr + ilen;
+	}
+
+	if (fixup & FIXUP_RETURN_REGISTER) {
+		int reg = (p->ainsn.insn[0] & 0xf0) >> 4;
+		regs->gprs[reg] += (unsigned long) p->addr -
+				   (unsigned long) p->ainsn.insn;
+	}
+
+	disable_singlestep(kcb, regs, ip);
+}
+NOKPROBE_SYMBOL(resume_execution);
+
+static int post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	struct kprobe *p = kprobe_running();
+
+	if (!p)
+		return 0;
+
+	if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+
+	resume_execution(p, regs);
+	pop_kprobe(kcb);
+	preempt_enable_no_resched();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, psw mask
+	 * will have PER set, in which case, continue the remaining processing
+	 * of do_single_step, as if this is not a probe hit.
+	 */
+	if (regs->psw.mask & PSW_MASK_PER)
+		return 0;
+
+	return 1;
+}
+NOKPROBE_SYMBOL(post_kprobe_handler);
+
+static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	struct kprobe *p = kprobe_running();
+	const struct exception_table_entry *entry;
+
+	switch(kcb->kprobe_status) {
+	case KPROBE_SWAP_INST:
+		/* We are here because the instruction replacement failed */
+		return 0;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the nip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		disable_singlestep(kcb, regs, (unsigned long) p->addr);
+		pop_kprobe(kcb);
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accounting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(p);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (p->fault_handler && p->fault_handler(p, regs, trapnr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+		if (entry) {
+			regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+			return 1;
+		}
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+NOKPROBE_SYMBOL(kprobe_trap_handler);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	int ret;
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_disable();
+	ret = kprobe_trap_handler(regs, trapnr);
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+	return ret;
+}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *) data;
+	struct pt_regs *regs = args->regs;
+	int ret = NOTIFY_DONE;
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_disable();
+
+	switch (val) {
+	case DIE_BPT:
+		if (kprobe_handler(regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (post_kprobe_handler(regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_TRAP:
+		if (!preemptible() && kprobe_running() &&
+		    kprobe_trap_handler(regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+
+	return ret;
+}
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
+
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct jprobe *jp = container_of(p, struct jprobe, kp);
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long stack;
+
+	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+	/* setup return addr to the jprobe handler routine */
+	regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
+
+	/* r15 is the stack pointer */
+	stack = (unsigned long) regs->gprs[15];
+
+	memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
+	return 1;
+}
+NOKPROBE_SYMBOL(setjmp_pre_handler);
+
+void jprobe_return(void)
+{
+	asm volatile(".word 0x0002");
+}
+NOKPROBE_SYMBOL(jprobe_return);
+
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long stack;
+
+	stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15];
+
+	/* Put the regs back */
+	memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+	/* put the stack back */
+	memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack));
+	preempt_enable_no_resched();
+	return 1;
+}
+NOKPROBE_SYMBOL(longjmp_break_handler);
+
+static struct kprobe trampoline = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	return register_kprobe(&trampoline);
+}
+
+int arch_trampoline_kprobe(struct kprobe *p)
+{
+	return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
new file mode 100644
index 000000000..6ea6d6933
--- /dev/null
+++ b/arch/s390/kernel/lgr.c
@@ -0,0 +1,186 @@
+/*
+ * Linux Guest Relocation (LGR) detection
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+
+#define LGR_TIMER_INTERVAL_SECS (30 * 60)
+#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */
+
+/*
+ * LGR info: Contains stfle and stsi data
+ */
+struct lgr_info {
+	/* Bit field with facility information: 4 DWORDs are stored */
+	u64 stfle_fac_list[4];
+	/* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */
+	u32 level;
+	/* Level 1: CEC info (stsi 1.1.1) */
+	char manufacturer[16];
+	char type[4];
+	char sequence[16];
+	char plant[4];
+	char model[16];
+	/* Level 2: LPAR info (stsi 2.2.2) */
+	u16 lpar_number;
+	char name[8];
+	/* Level 3: VM info (stsi 3.2.2) */
+	u8 vm_count;
+	struct {
+		char name[8];
+		char cpi[16];
+	} vm[VM_LEVEL_MAX];
+} __packed __aligned(8);
+
+/*
+ * LGR globals
+ */
+static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+static struct lgr_info lgr_info_last;
+static struct lgr_info lgr_info_cur;
+static struct debug_info *lgr_dbf;
+
+/*
+ * Copy buffer and then convert it to ASCII
+ */
+static void cpascii(char *dst, char *src, int size)
+{
+	memcpy(dst, src, size);
+	EBCASC(dst, size);
+}
+
+/*
+ * Fill LGR info with 1.1.1 stsi data
+ */
+static void lgr_stsi_1_1_1(struct lgr_info *lgr_info)
+{
+	struct sysinfo_1_1_1 *si = (void *) lgr_page;
+
+	if (stsi(si, 1, 1, 1))
+		return;
+	cpascii(lgr_info->manufacturer, si->manufacturer,
+		sizeof(si->manufacturer));
+	cpascii(lgr_info->type, si->type, sizeof(si->type));
+	cpascii(lgr_info->model, si->model, sizeof(si->model));
+	cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence));
+	cpascii(lgr_info->plant, si->plant, sizeof(si->plant));
+}
+
+/*
+ * Fill LGR info with 2.2.2 stsi data
+ */
+static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
+{
+	struct sysinfo_2_2_2 *si = (void *) lgr_page;
+
+	if (stsi(si, 2, 2, 2))
+		return;
+	cpascii(lgr_info->name, si->name, sizeof(si->name));
+	memcpy(&lgr_info->lpar_number, &si->lpar_number,
+	       sizeof(lgr_info->lpar_number));
+}
+
+/*
+ * Fill LGR info with 3.2.2 stsi data
+ */
+static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
+{
+	struct sysinfo_3_2_2 *si = (void *) lgr_page;
+	int i;
+
+	if (stsi(si, 3, 2, 2))
+		return;
+	for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) {
+		cpascii(lgr_info->vm[i].name, si->vm[i].name,
+			sizeof(si->vm[i].name));
+		cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi,
+			sizeof(si->vm[i].cpi));
+	}
+	lgr_info->vm_count = si->count;
+}
+
+/*
+ * Fill LGR info with current data
+ */
+static void lgr_info_get(struct lgr_info *lgr_info)
+{
+	int level;
+
+	memset(lgr_info, 0, sizeof(*lgr_info));
+	stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list));
+	level = stsi(NULL, 0, 0, 0);
+	lgr_info->level = level;
+	if (level >= 1)
+		lgr_stsi_1_1_1(lgr_info);
+	if (level >= 2)
+		lgr_stsi_2_2_2(lgr_info);
+	if (level >= 3)
+		lgr_stsi_3_2_2(lgr_info);
+}
+
+/*
+ * Check if LGR info has changed and if yes log new LGR info to s390dbf
+ */
+void lgr_info_log(void)
+{
+	static DEFINE_SPINLOCK(lgr_info_lock);
+	unsigned long flags;
+
+	if (!spin_trylock_irqsave(&lgr_info_lock, flags))
+		return;
+	lgr_info_get(&lgr_info_cur);
+	if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) {
+		debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur));
+		lgr_info_last = lgr_info_cur;
+	}
+	spin_unlock_irqrestore(&lgr_info_lock, flags);
+}
+EXPORT_SYMBOL_GPL(lgr_info_log);
+
+static void lgr_timer_set(void);
+
+/*
+ * LGR timer callback
+ */
+static void lgr_timer_fn(unsigned long ignored)
+{
+	lgr_info_log();
+	lgr_timer_set();
+}
+
+static struct timer_list lgr_timer =
+	TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0);
+
+/*
+ * Setup next LGR timer
+ */
+static void lgr_timer_set(void)
+{
+	mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+}
+
+/*
+ * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer
+ */
+static int __init lgr_init(void)
+{
+	lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info));
+	if (!lgr_dbf)
+		return -ENOMEM;
+	debug_register_view(lgr_dbf, &debug_hex_ascii_view);
+	lgr_info_get(&lgr_info_last);
+	debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last));
+	lgr_timer_set();
+	return 0;
+}
+module_init(lgr_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
new file mode 100644
index 000000000..fb0901ec4
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright IBM Corp. 2005, 2011
+ *
+ * Author(s): Rolf Adelsberger,
+ *	      Heiko Carstens <heiko.carstens@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/ftrace.h>
+#include <linux/debug_locks.h>
+#include <linux/suspend.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/smp.h>
+#include <asm/reset.h>
+#include <asm/ipl.h>
+#include <asm/diag.h>
+#include <asm/elf.h>
+#include <asm/asm-offsets.h>
+#include <asm/os_info.h>
+#include <asm/switch_to.h>
+
+typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
+
+extern const unsigned char relocate_kernel[];
+extern const unsigned long long relocate_kernel_len;
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Create ELF notes for one CPU
+ */
+static void add_elf_notes(int cpu)
+{
+	struct save_area *sa = (void *) 4608 + store_prefix();
+	void *ptr;
+
+	memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
+	ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
+	ptr = fill_cpu_elf_notes(ptr, sa, NULL);
+	memset(ptr, 0, sizeof(struct elf_note));
+}
+
+/*
+ * Initialize CPU ELF notes
+ */
+static void setup_regs(void)
+{
+	unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
+	struct _lowcore *lc;
+	int cpu, this_cpu;
+
+	/* Get lowcore pointer from store status of this CPU (absolute zero) */
+	lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area;
+	this_cpu = smp_find_processor_id(stap());
+	add_elf_notes(this_cpu);
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+		if (smp_store_status(cpu))
+			continue;
+		add_elf_notes(cpu);
+	}
+	if (MACHINE_HAS_VX)
+		save_vx_regs_safe((void *) lc->vector_save_area_addr);
+	/* Copy dump CPU store status info to absolute zero */
+	memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
+}
+
+/*
+ * PM notifier callback for kdump
+ */
+static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
+			       void *ptr)
+{
+	switch (action) {
+	case PM_SUSPEND_PREPARE:
+	case PM_HIBERNATION_PREPARE:
+		if (crashk_res.start)
+			crash_map_reserved_pages();
+		break;
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+		if (crashk_res.start)
+			crash_unmap_reserved_pages();
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+	return NOTIFY_OK;
+}
+
+static int __init machine_kdump_pm_init(void)
+{
+	pm_notifier(machine_kdump_pm_cb, 0);
+	return 0;
+}
+arch_initcall(machine_kdump_pm_init);
+
+/*
+ * Start kdump: We expect here that a store status has been done on our CPU
+ */
+static void __do_machine_kdump(void *image)
+{
+	int (*start_kdump)(int) = (void *)((struct kimage *) image)->start;
+
+	__load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
+	start_kdump(1);
+}
+#endif
+
+/*
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
+ */
+static int kdump_csum_valid(struct kimage *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+	int (*start_kdump)(int) = (void *)image->start;
+	int rc;
+
+	__arch_local_irq_stnsm(0xfb); /* disable DAT */
+	rc = start_kdump(0);
+	__arch_local_irq_stosm(0x04); /* enable DAT */
+	return rc ? 0 : -EINVAL;
+#else
+	return -EINVAL;
+#endif
+}
+
+/*
+ * Map or unmap crashkernel memory
+ */
+static void crash_map_pages(int enable)
+{
+	unsigned long size = resource_size(&crashk_res);
+
+	BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
+	       size % KEXEC_CRASH_MEM_ALIGN);
+	if (enable)
+		vmem_add_mapping(crashk_res.start, size);
+	else {
+		vmem_remove_mapping(crashk_res.start, size);
+		if (size)
+			os_info_crashkernel_add(crashk_res.start, size);
+		else
+			os_info_crashkernel_add(0, 0);
+	}
+}
+
+/*
+ * Map crashkernel memory
+ */
+void crash_map_reserved_pages(void)
+{
+	crash_map_pages(1);
+}
+
+/*
+ * Unmap crashkernel memory
+ */
+void crash_unmap_reserved_pages(void)
+{
+	crash_map_pages(0);
+}
+
+/*
+ * Give back memory to hypervisor before new kdump is loaded
+ */
+static int machine_kexec_prepare_kdump(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (MACHINE_IS_VM)
+		diag10_range(PFN_DOWN(crashk_res.start),
+			     PFN_DOWN(crashk_res.end - crashk_res.start + 1));
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	void *reboot_code_buffer;
+
+	/* Can't replace kernel image since it is read-only. */
+	if (ipl_flags & IPL_NSS_VALID)
+		return -EOPNOTSUPP;
+
+	if (image->type == KEXEC_TYPE_CRASH)
+		return machine_kexec_prepare_kdump();
+
+	/* We don't support anything but the default image type for now. */
+	if (image->type != KEXEC_TYPE_DEFAULT)
+		return -EINVAL;
+
+	/* Get the destination where the assembler code should be copied to.*/
+	reboot_code_buffer = (void *) page_to_phys(image->control_code_page);
+
+	/* Then copy it */
+	memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len);
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+	VMCOREINFO_SYMBOL(lowcore_ptr);
+	VMCOREINFO_SYMBOL(high_memory);
+	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+}
+
+void machine_shutdown(void)
+{
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+}
+
+/*
+ * Do normal kexec
+ */
+static void __do_machine_kexec(void *data)
+{
+	relocate_kernel_t data_mover;
+	struct kimage *image = data;
+
+	data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
+
+	/* Call the moving routine */
+	(*data_mover)(&image->head, image->start);
+}
+
+/*
+ * Reset system and call either kdump or normal kexec
+ */
+static void __machine_kexec(void *data)
+{
+	__arch_local_irq_stosm(0x04); /* enable DAT */
+	pfault_fini();
+	tracing_off();
+	debug_locks_off();
+#ifdef CONFIG_CRASH_DUMP
+	if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) {
+
+		lgr_info_log();
+		s390_reset_system(setup_regs, __do_machine_kdump, data);
+	} else
+#endif
+		s390_reset_system(NULL, __do_machine_kexec, data);
+	disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+/*
+ * Do either kdump or normal kexec. In case of kdump we first ask
+ * purgatory, if kdump checksums are valid.
+ */
+void machine_kexec(struct kimage *image)
+{
+	if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image))
+		return;
+	tracer_disable();
+	smp_send_stop();
+	smp_call_ipl_cpu(__machine_kexec, image);
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
new file mode 100644
index 000000000..e499370fb
--- /dev/null
+++ b/arch/s390/kernel/mcount.S
@@ -0,0 +1,82 @@
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ptrace.h>
+
+	.section .kprobes.text, "ax"
+
+ENTRY(ftrace_stub)
+	br	%r14
+
+#define STACK_FRAME_SIZE  (STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS	  (STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW  (STACK_PTREGS + __PT_PSW)
+
+ENTRY(_mcount)
+	br	%r14
+
+ENTRY(ftrace_caller)
+	.globl	ftrace_regs_caller
+	.set	ftrace_regs_caller,ftrace_caller
+	lgr	%r1,%r15
+#ifndef CC_USING_HOTPATCH
+	aghi	%r0,MCOUNT_RETURN_FIXUP
+#endif
+	aghi	%r15,-STACK_FRAME_SIZE
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	stg	%r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+	stg	%r0,(STACK_PTREGS_PSW+8)(%r15)
+	stmg	%r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+	aghik	%r2,%r0,-MCOUNT_INSN_SIZE
+	lgrl	%r4,function_trace_op
+	lgrl	%r1,ftrace_trace_function
+#else
+	lgr	%r2,%r0
+	aghi	%r2,-MCOUNT_INSN_SIZE
+	larl	%r4,function_trace_op
+	lg	%r4,0(%r4)
+	larl	%r1,ftrace_trace_function
+	lg	%r1,0(%r1)
+#endif
+	lgr	%r3,%r14
+	la	%r5,STACK_PTREGS(%r15)
+	basr	%r14,%r1
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+# The j instruction gets runtime patched to a nop instruction.
+# See ftrace_enable_ftrace_graph_caller.
+ENTRY(ftrace_graph_caller)
+	j	ftrace_graph_caller_end
+	lg	%r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+	lg	%r3,(STACK_PTREGS_PSW+8)(%r15)
+	brasl	%r14,prepare_ftrace_return
+	stg	%r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+ftrace_graph_caller_end:
+	.globl	ftrace_graph_caller_end
+#endif
+	lg	%r1,(STACK_PTREGS_PSW+8)(%r15)
+	lmg	%r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+	br	%r1
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+ENTRY(return_to_handler)
+	stmg	%r2,%r5,32(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-STACK_FRAME_OVERHEAD
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	brasl	%r14,ftrace_return_to_handler
+	aghi	%r15,STACK_FRAME_OVERHEAD
+	lgr	%r14,%r2
+	lmg	%r2,%r5,32(%r15)
+	br	%r14
+
+#endif
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
new file mode 100644
index 000000000..0c1a67931
--- /dev/null
+++ b/arch/s390/kernel/module.c
@@ -0,0 +1,431 @@
+/*
+ *  Kernel module help for s390.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 2002, 2003
+ *    Author(s): Arnd Bergmann (arndb@de.ibm.com)
+ *		 Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  based on i386 version
+ *    Copyright (C) 2001 Rusty Russell.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/moduleloader.h>
+#include <linux/bug.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+#define PLT_ENTRY_SIZE 20
+
+void *module_alloc(unsigned long size)
+{
+	if (PAGE_ALIGN(size) > MODULES_LEN)
+		return NULL;
+	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				    GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
+				    __builtin_return_address(0));
+}
+
+void module_arch_freeing_init(struct module *mod)
+{
+	vfree(mod->arch.syminfo);
+	mod->arch.syminfo = NULL;
+}
+
+static void check_rela(Elf_Rela *rela, struct module *me)
+{
+	struct mod_arch_syminfo *info;
+
+	info = me->arch.syminfo + ELF_R_SYM (rela->r_info);
+	switch (ELF_R_TYPE (rela->r_info)) {
+	case R_390_GOT12:	/* 12 bit GOT offset.  */
+	case R_390_GOT16:	/* 16 bit GOT offset.  */
+	case R_390_GOT20:	/* 20 bit GOT offset.  */
+	case R_390_GOT32:	/* 32 bit GOT offset.  */
+	case R_390_GOT64:	/* 64 bit GOT offset.  */
+	case R_390_GOTENT:	/* 32 bit PC rel. to GOT entry shifted by 1. */
+	case R_390_GOTPLT12:	/* 12 bit offset to jump slot.	*/
+	case R_390_GOTPLT16:	/* 16 bit offset to jump slot.  */
+	case R_390_GOTPLT20:	/* 20 bit offset to jump slot.  */
+	case R_390_GOTPLT32:	/* 32 bit offset to jump slot.  */
+	case R_390_GOTPLT64:	/* 64 bit offset to jump slot.	*/
+	case R_390_GOTPLTENT:	/* 32 bit rel. offset to jump slot >> 1. */
+		if (info->got_offset == -1UL) {
+			info->got_offset = me->arch.got_size;
+			me->arch.got_size += sizeof(void*);
+		}
+		break;
+	case R_390_PLT16DBL:	/* 16 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32DBL:	/* 32 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32:	/* 32 bit PC relative PLT address.  */
+	case R_390_PLT64:	/* 64 bit PC relative PLT address.  */
+	case R_390_PLTOFF16:	/* 16 bit offset from GOT to PLT. */
+	case R_390_PLTOFF32:	/* 32 bit offset from GOT to PLT. */
+	case R_390_PLTOFF64:	/* 16 bit offset from GOT to PLT. */
+		if (info->plt_offset == -1UL) {
+			info->plt_offset = me->arch.plt_size;
+			me->arch.plt_size += PLT_ENTRY_SIZE;
+		}
+		break;
+	case R_390_COPY:
+	case R_390_GLOB_DAT:
+	case R_390_JMP_SLOT:
+	case R_390_RELATIVE:
+		/* Only needed if we want to support loading of 
+		   modules linked with -shared. */
+		break;
+	}
+}
+
+/*
+ * Account for GOT and PLT relocations. We can't add sections for
+ * got and plt but we can increase the core module size.
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *me)
+{
+	Elf_Shdr *symtab;
+	Elf_Sym *symbols;
+	Elf_Rela *rela;
+	char *strings;
+	int nrela, i, j;
+
+	/* Find symbol table and string table. */
+	symtab = NULL;
+	for (i = 0; i < hdr->e_shnum; i++)
+		switch (sechdrs[i].sh_type) {
+		case SHT_SYMTAB:
+			symtab = sechdrs + i;
+			break;
+		}
+	if (!symtab) {
+		printk(KERN_ERR "module %s: no symbol table\n", me->name);
+		return -ENOEXEC;
+	}
+
+	/* Allocate one syminfo structure per symbol. */
+	me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
+	me->arch.syminfo = vmalloc(me->arch.nsyms *
+				   sizeof(struct mod_arch_syminfo));
+	if (!me->arch.syminfo)
+		return -ENOMEM;
+	symbols = (void *) hdr + symtab->sh_offset;
+	strings = (void *) hdr + sechdrs[symtab->sh_link].sh_offset;
+	for (i = 0; i < me->arch.nsyms; i++) {
+		if (symbols[i].st_shndx == SHN_UNDEF &&
+		    strcmp(strings + symbols[i].st_name,
+			   "_GLOBAL_OFFSET_TABLE_") == 0)
+			/* "Define" it as absolute. */
+			symbols[i].st_shndx = SHN_ABS;
+		me->arch.syminfo[i].got_offset = -1UL;
+		me->arch.syminfo[i].plt_offset = -1UL;
+		me->arch.syminfo[i].got_initialized = 0;
+		me->arch.syminfo[i].plt_initialized = 0;
+	}
+
+	/* Search for got/plt relocations. */
+	me->arch.got_size = me->arch.plt_size = 0;
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type != SHT_RELA)
+			continue;
+		nrela = sechdrs[i].sh_size / sizeof(Elf_Rela);
+		rela = (void *) hdr + sechdrs[i].sh_offset;
+		for (j = 0; j < nrela; j++)
+			check_rela(rela + j, me);
+	}
+
+	/* Increase core size by size of got & plt and set start
+	   offsets for got and plt. */
+	me->core_size = ALIGN(me->core_size, 4);
+	me->arch.got_offset = me->core_size;
+	me->core_size += me->arch.got_size;
+	me->arch.plt_offset = me->core_size;
+	me->core_size += me->arch.plt_size;
+	return 0;
+}
+
+static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
+			   int sign, int bits, int shift)
+{
+	unsigned long umax;
+	long min, max;
+
+	if (val & ((1UL << shift) - 1))
+		return -ENOEXEC;
+	if (sign) {
+		val = (Elf_Addr)(((long) val) >> shift);
+		min = -(1L << (bits - 1));
+		max = (1L << (bits - 1)) - 1;
+		if ((long) val < min || (long) val > max)
+			return -ENOEXEC;
+	} else {
+		val >>= shift;
+		umax = ((1UL << (bits - 1)) << 1) - 1;
+		if ((unsigned long) val > umax)
+			return -ENOEXEC;
+	}
+
+	if (bits == 8)
+		*(unsigned char *) loc = val;
+	else if (bits == 12)
+		*(unsigned short *) loc = (val & 0xfff) |
+			(*(unsigned short *) loc & 0xf000);
+	else if (bits == 16)
+		*(unsigned short *) loc = val;
+	else if (bits == 20)
+		*(unsigned int *) loc = (val & 0xfff) << 16 |
+			(val & 0xff000) >> 4 |
+			(*(unsigned int *) loc & 0xf00000ff);
+	else if (bits == 32)
+		*(unsigned int *) loc = val;
+	else if (bits == 64)
+		*(unsigned long *) loc = val;
+	return 0;
+}
+
+static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+		      const char *strtab, struct module *me)
+{
+	struct mod_arch_syminfo *info;
+	Elf_Addr loc, val;
+	int r_type, r_sym;
+	int rc = -ENOEXEC;
+
+	/* This is where to make the change */
+	loc = base + rela->r_offset;
+	/* This is the symbol it is referring to.  Note that all
+	   undefined symbols have been resolved.  */
+	r_sym = ELF_R_SYM(rela->r_info);
+	r_type = ELF_R_TYPE(rela->r_info);
+	info = me->arch.syminfo + r_sym;
+	val = symtab[r_sym].st_value;
+
+	switch (r_type) {
+	case R_390_NONE:	/* No relocation.  */
+		rc = 0;
+		break;
+	case R_390_8:		/* Direct 8 bit.   */
+	case R_390_12:		/* Direct 12 bit.  */
+	case R_390_16:		/* Direct 16 bit.  */
+	case R_390_20:		/* Direct 20 bit.  */
+	case R_390_32:		/* Direct 32 bit.  */
+	case R_390_64:		/* Direct 64 bit.  */
+		val += rela->r_addend;
+		if (r_type == R_390_8)
+			rc = apply_rela_bits(loc, val, 0, 8, 0);
+		else if (r_type == R_390_12)
+			rc = apply_rela_bits(loc, val, 0, 12, 0);
+		else if (r_type == R_390_16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0);
+		else if (r_type == R_390_20)
+			rc = apply_rela_bits(loc, val, 1, 20, 0);
+		else if (r_type == R_390_32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0);
+		else if (r_type == R_390_64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0);
+		break;
+	case R_390_PC16:	/* PC relative 16 bit.  */
+	case R_390_PC16DBL:	/* PC relative 16 bit shifted by 1.  */
+	case R_390_PC32DBL:	/* PC relative 32 bit shifted by 1.  */
+	case R_390_PC32:	/* PC relative 32 bit.  */
+	case R_390_PC64:	/* PC relative 64 bit.	*/
+		val += rela->r_addend - loc;
+		if (r_type == R_390_PC16)
+			rc = apply_rela_bits(loc, val, 1, 16, 0);
+		else if (r_type == R_390_PC16DBL)
+			rc = apply_rela_bits(loc, val, 1, 16, 1);
+		else if (r_type == R_390_PC32DBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1);
+		else if (r_type == R_390_PC32)
+			rc = apply_rela_bits(loc, val, 1, 32, 0);
+		else if (r_type == R_390_PC64)
+			rc = apply_rela_bits(loc, val, 1, 64, 0);
+		break;
+	case R_390_GOT12:	/* 12 bit GOT offset.  */
+	case R_390_GOT16:	/* 16 bit GOT offset.  */
+	case R_390_GOT20:	/* 20 bit GOT offset.  */
+	case R_390_GOT32:	/* 32 bit GOT offset.  */
+	case R_390_GOT64:	/* 64 bit GOT offset.  */
+	case R_390_GOTENT:	/* 32 bit PC rel. to GOT entry shifted by 1. */
+	case R_390_GOTPLT12:	/* 12 bit offset to jump slot.	*/
+	case R_390_GOTPLT20:	/* 20 bit offset to jump slot.  */
+	case R_390_GOTPLT16:	/* 16 bit offset to jump slot.  */
+	case R_390_GOTPLT32:	/* 32 bit offset to jump slot.  */
+	case R_390_GOTPLT64:	/* 64 bit offset to jump slot.	*/
+	case R_390_GOTPLTENT:	/* 32 bit rel. offset to jump slot >> 1. */
+		if (info->got_initialized == 0) {
+			Elf_Addr *gotent;
+
+			gotent = me->module_core + me->arch.got_offset +
+				info->got_offset;
+			*gotent = val;
+			info->got_initialized = 1;
+		}
+		val = info->got_offset + rela->r_addend;
+		if (r_type == R_390_GOT12 ||
+		    r_type == R_390_GOTPLT12)
+			rc = apply_rela_bits(loc, val, 0, 12, 0);
+		else if (r_type == R_390_GOT16 ||
+			 r_type == R_390_GOTPLT16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0);
+		else if (r_type == R_390_GOT20 ||
+			 r_type == R_390_GOTPLT20)
+			rc = apply_rela_bits(loc, val, 1, 20, 0);
+		else if (r_type == R_390_GOT32 ||
+			 r_type == R_390_GOTPLT32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0);
+		else if (r_type == R_390_GOT64 ||
+			 r_type == R_390_GOTPLT64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0);
+		else if (r_type == R_390_GOTENT ||
+			 r_type == R_390_GOTPLTENT) {
+			val += (Elf_Addr) me->module_core - loc;
+			rc = apply_rela_bits(loc, val, 1, 32, 1);
+		}
+		break;
+	case R_390_PLT16DBL:	/* 16 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32DBL:	/* 32 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32:	/* 32 bit PC relative PLT address.  */
+	case R_390_PLT64:	/* 64 bit PC relative PLT address.  */
+	case R_390_PLTOFF16:	/* 16 bit offset from GOT to PLT. */
+	case R_390_PLTOFF32:	/* 32 bit offset from GOT to PLT. */
+	case R_390_PLTOFF64:	/* 16 bit offset from GOT to PLT. */
+		if (info->plt_initialized == 0) {
+			unsigned int *ip;
+			ip = me->module_core + me->arch.plt_offset +
+				info->plt_offset;
+			ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
+			ip[1] = 0x100a0004;
+			ip[2] = 0x07f10000;
+			ip[3] = (unsigned int) (val >> 32);
+			ip[4] = (unsigned int) val;
+			info->plt_initialized = 1;
+		}
+		if (r_type == R_390_PLTOFF16 ||
+		    r_type == R_390_PLTOFF32 ||
+		    r_type == R_390_PLTOFF64)
+			val = me->arch.plt_offset - me->arch.got_offset +
+				info->plt_offset + rela->r_addend;
+		else {
+			if (!((r_type == R_390_PLT16DBL &&
+			       val - loc + 0xffffUL < 0x1ffffeUL) ||
+			      (r_type == R_390_PLT32DBL &&
+			       val - loc + 0xffffffffULL < 0x1fffffffeULL)))
+				val = (Elf_Addr) me->module_core +
+					me->arch.plt_offset +
+					info->plt_offset;
+			val += rela->r_addend - loc;
+		}
+		if (r_type == R_390_PLT16DBL)
+			rc = apply_rela_bits(loc, val, 1, 16, 1);
+		else if (r_type == R_390_PLTOFF16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0);
+		else if (r_type == R_390_PLT32DBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1);
+		else if (r_type == R_390_PLT32 ||
+			 r_type == R_390_PLTOFF32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0);
+		else if (r_type == R_390_PLT64 ||
+			 r_type == R_390_PLTOFF64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0);
+		break;
+	case R_390_GOTOFF16:	/* 16 bit offset to GOT.  */
+	case R_390_GOTOFF32:	/* 32 bit offset to GOT.  */
+	case R_390_GOTOFF64:	/* 64 bit offset to GOT. */
+		val = val + rela->r_addend -
+			((Elf_Addr) me->module_core + me->arch.got_offset);
+		if (r_type == R_390_GOTOFF16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0);
+		else if (r_type == R_390_GOTOFF32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0);
+		else if (r_type == R_390_GOTOFF64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0);
+		break;
+	case R_390_GOTPC:	/* 32 bit PC relative offset to GOT. */
+	case R_390_GOTPCDBL:	/* 32 bit PC rel. off. to GOT shifted by 1. */
+		val = (Elf_Addr) me->module_core + me->arch.got_offset +
+			rela->r_addend - loc;
+		if (r_type == R_390_GOTPC)
+			rc = apply_rela_bits(loc, val, 1, 32, 0);
+		else if (r_type == R_390_GOTPCDBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1);
+		break;
+	case R_390_COPY:
+	case R_390_GLOB_DAT:	/* Create GOT entry.  */
+	case R_390_JMP_SLOT:	/* Create PLT entry.  */
+	case R_390_RELATIVE:	/* Adjust by program base.  */
+		/* Only needed if we want to support loading of 
+		   modules linked with -shared. */
+		return -ENOEXEC;
+	default:
+		printk(KERN_ERR "module %s: unknown relocation: %u\n",
+		       me->name, r_type);
+		return -ENOEXEC;
+	}
+	if (rc) {
+		printk(KERN_ERR "module %s: relocation error for symbol %s "
+		       "(r_type %i, value 0x%lx)\n",
+		       me->name, strtab + symtab[r_sym].st_name,
+		       r_type, (unsigned long) val);
+		return rc;
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relsec,
+		       struct module *me)
+{
+	Elf_Addr base;
+	Elf_Sym *symtab;
+	Elf_Rela *rela;
+	unsigned long i, n;
+	int rc;
+
+	DEBUGP("Applying relocate section %u to %u\n",
+	       relsec, sechdrs[relsec].sh_info);
+	base = sechdrs[sechdrs[relsec].sh_info].sh_addr;
+	symtab = (Elf_Sym *) sechdrs[symindex].sh_addr;
+	rela = (Elf_Rela *) sechdrs[relsec].sh_addr;
+	n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
+
+	for (i = 0; i < n; i++, rela++) {
+		rc = apply_rela(rela, base, symtab, strtab, me);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	jump_label_apply_nops(me);
+	vfree(me->arch.syminfo);
+	me->arch.syminfo = NULL;
+	return 0;
+}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
new file mode 100644
index 000000000..505c17c0a
--- /dev/null
+++ b/arch/s390/kernel/nmi.c
@@ -0,0 +1,352 @@
+/*
+ *   Machine check handler
+ *
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <asm/lowcore.h>
+#include <asm/smp.h>
+#include <asm/etr.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/crw.h>
+#include <asm/switch_to.h>
+
+struct mcck_struct {
+	int kill_task;
+	int channel_report;
+	int warning;
+	unsigned long long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
+static void s390_handle_damage(char *msg)
+{
+	smp_send_stop();
+	disabled_wait((unsigned long) __builtin_return_address(0));
+	while (1);
+}
+
+/*
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
+ */
+void s390_handle_mcck(void)
+{
+	unsigned long flags;
+	struct mcck_struct mcck;
+
+	/*
+	 * Disable machine checks and get the current state of accumulated
+	 * machine checks. Afterwards delete the old state and enable machine
+	 * checks again.
+	 */
+	local_irq_save(flags);
+	local_mcck_disable();
+	mcck = *this_cpu_ptr(&cpu_mcck);
+	memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
+	clear_cpu_flag(CIF_MCCK_PENDING);
+	local_mcck_enable();
+	local_irq_restore(flags);
+
+	if (mcck.channel_report)
+		crw_handle_channel_report();
+	/*
+	 * A warning may remain for a prolonged period on the bare iron.
+	 * (actually until the machine is powered off, or the problem is gone)
+	 * So we just stop listening for the WARNING MCH and avoid continuously
+	 * being interrupted.  One caveat is however, that we must do this per
+	 * processor and cannot use the smp version of ctl_clear_bit().
+	 * On VM we only get one interrupt per virtally presented machinecheck.
+	 * Though one suffices, we may get one interrupt per (virtual) cpu.
+	 */
+	if (mcck.warning) {	/* WARNING pending ? */
+		static int mchchk_wng_posted = 0;
+
+		/* Use single cpu clear, as we cannot handle smp here. */
+		__ctl_clear_bit(14, 24);	/* Disable WARNING MCH */
+		if (xchg(&mchchk_wng_posted, 1) == 0)
+			kill_cad_pid(SIGPWR, 1);
+	}
+	if (mcck.kill_task) {
+		local_irq_enable();
+		printk(KERN_EMERG "mcck: Terminating task because of machine "
+		       "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+		printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+		       current->comm, current->pid);
+		do_exit(SIGSEGV);
+	}
+}
+EXPORT_SYMBOL_GPL(s390_handle_mcck);
+
+/*
+ * returns 0 if all registers could be validated
+ * returns 1 otherwise
+ */
+static int notrace s390_revalidate_registers(struct mci *mci)
+{
+	int kill_task;
+	u64 zero;
+	void *fpt_save_area, *fpt_creg_save_area;
+
+	kill_task = 0;
+	zero = 0;
+
+	if (!mci->gr) {
+		/*
+		 * General purpose registers couldn't be restored and have
+		 * unknown contents. Process needs to be terminated.
+		 */
+		kill_task = 1;
+	}
+	if (!mci->fp) {
+		/*
+		 * Floating point registers can't be restored and
+		 * therefore the process needs to be terminated.
+		 */
+		kill_task = 1;
+	}
+	fpt_save_area = &S390_lowcore.floating_pt_save_area;
+	fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+	if (!mci->fc) {
+		/*
+		 * Floating point control register can't be restored.
+		 * Task will be terminated.
+		 */
+		asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
+		kill_task = 1;
+	} else
+		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
+
+	asm volatile(
+		"	ld	0,0(%0)\n"
+		"	ld	1,8(%0)\n"
+		"	ld	2,16(%0)\n"
+		"	ld	3,24(%0)\n"
+		"	ld	4,32(%0)\n"
+		"	ld	5,40(%0)\n"
+		"	ld	6,48(%0)\n"
+		"	ld	7,56(%0)\n"
+		"	ld	8,64(%0)\n"
+		"	ld	9,72(%0)\n"
+		"	ld	10,80(%0)\n"
+		"	ld	11,88(%0)\n"
+		"	ld	12,96(%0)\n"
+		"	ld	13,104(%0)\n"
+		"	ld	14,112(%0)\n"
+		"	ld	15,120(%0)\n"
+		: : "a" (fpt_save_area));
+	/* Revalidate vector registers */
+	if (MACHINE_HAS_VX && current->thread.vxrs) {
+		if (!mci->vr) {
+			/*
+			 * Vector registers can't be restored and therefore
+			 * the process needs to be terminated.
+			 */
+			kill_task = 1;
+		}
+		restore_vx_regs((__vector128 *)
+				S390_lowcore.vector_save_area_addr);
+	}
+	/* Revalidate access registers */
+	asm volatile(
+		"	lam	0,15,0(%0)"
+		: : "a" (&S390_lowcore.access_regs_save_area));
+	if (!mci->ar) {
+		/*
+		 * Access registers have unknown contents.
+		 * Terminating task.
+		 */
+		kill_task = 1;
+	}
+	/* Revalidate control registers */
+	if (!mci->cr) {
+		/*
+		 * Control registers have unknown contents.
+		 * Can't recover and therefore stopping machine.
+		 */
+		s390_handle_damage("invalid control registers.");
+	} else {
+		asm volatile(
+			"	lctlg	0,15,0(%0)"
+			: : "a" (&S390_lowcore.cregs_save_area));
+	}
+	/*
+	 * We don't even try to revalidate the TOD register, since we simply
+	 * can't write something sensible into that register.
+	 */
+	/*
+	 * See if we can revalidate the TOD programmable register with its
+	 * old contents (should be zero) otherwise set it to zero.
+	 */
+	if (!mci->pr)
+		asm volatile(
+			"	sr	0,0\n"
+			"	sckpf"
+			: : : "0", "cc");
+	else
+		asm volatile(
+			"	l	0,0(%0)\n"
+			"	sckpf"
+			: : "a" (&S390_lowcore.tod_progreg_save_area)
+			: "0", "cc");
+	/* Revalidate clock comparator register */
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	/* Check if old PSW is valid */
+	if (!mci->wp)
+		/*
+		 * Can't tell if we come from user or kernel mode
+		 * -> stopping machine.
+		 */
+		s390_handle_damage("old psw invalid.");
+
+	if (!mci->ms || !mci->pm || !mci->ia)
+		kill_task = 1;
+
+	return kill_task;
+}
+
+#define MAX_IPD_COUNT	29
+#define MAX_IPD_TIME	(5 * 60 * USEC_PER_SEC) /* 5 minutes */
+
+#define ED_STP_ISLAND	6	/* External damage STP island check */
+#define ED_STP_SYNC	7	/* External damage STP sync check */
+#define ED_ETR_SYNC	12	/* External damage ETR sync check */
+#define ED_ETR_SWITCH	13	/* External damage ETR switch to local */
+
+/*
+ * machine check handler.
+ */
+void notrace s390_do_machine_check(struct pt_regs *regs)
+{
+	static int ipd_count;
+	static DEFINE_SPINLOCK(ipd_lock);
+	static unsigned long long last_ipd;
+	struct mcck_struct *mcck;
+	unsigned long long tmp;
+	struct mci *mci;
+	int umode;
+
+	nmi_enter();
+	inc_irq_stat(NMI_NMI);
+	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+	mcck = this_cpu_ptr(&cpu_mcck);
+	umode = user_mode(regs);
+
+	if (mci->sd) {
+		/* System damage -> stopping machine */
+		s390_handle_damage("received system damage machine check.");
+	}
+	if (mci->pd) {
+		if (mci->b) {
+			/* Processing backup -> verify if we can survive this */
+			u64 z_mcic, o_mcic, t_mcic;
+			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+				  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+				  1ULL<<16);
+			t_mcic = *(u64 *)mci;
+
+			if (((t_mcic & z_mcic) != 0) ||
+			    ((t_mcic & o_mcic) != o_mcic)) {
+				s390_handle_damage("processing backup machine "
+						   "check with damage.");
+			}
+
+			/*
+			 * Nullifying exigent condition, therefore we might
+			 * retry this instruction.
+			 */
+			spin_lock(&ipd_lock);
+			tmp = get_tod_clock();
+			if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
+				ipd_count++;
+			else
+				ipd_count = 1;
+			last_ipd = tmp;
+			if (ipd_count == MAX_IPD_COUNT)
+				s390_handle_damage("too many ipd retries.");
+			spin_unlock(&ipd_lock);
+		} else {
+			/* Processing damage -> stopping machine */
+			s390_handle_damage("received instruction processing "
+					   "damage machine check.");
+		}
+	}
+	if (s390_revalidate_registers(mci)) {
+		if (umode) {
+			/*
+			 * Couldn't restore all register contents while in
+			 * user mode -> mark task for termination.
+			 */
+			mcck->kill_task = 1;
+			mcck->mcck_code = *(unsigned long long *) mci;
+			set_cpu_flag(CIF_MCCK_PENDING);
+		} else {
+			/*
+			 * Couldn't restore all register contents while in
+			 * kernel mode -> stopping machine.
+			 */
+			s390_handle_damage("unable to revalidate registers.");
+		}
+	}
+	if (mci->cd) {
+		/* Timing facility damage */
+		s390_handle_damage("TOD clock damaged");
+	}
+	if (mci->ed && mci->ec) {
+		/* External damage */
+		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
+			etr_sync_check();
+		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
+			etr_switch_to_local();
+		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+			stp_sync_check();
+		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+			stp_island_check();
+	}
+	if (mci->se)
+		/* Storage error uncorrected */
+		s390_handle_damage("received storage error uncorrected "
+				   "machine check.");
+	if (mci->ke)
+		/* Storage key-error uncorrected */
+		s390_handle_damage("received storage key-error uncorrected "
+				   "machine check.");
+	if (mci->ds && mci->fa)
+		/* Storage degradation */
+		s390_handle_damage("received storage degradation machine "
+				   "check.");
+	if (mci->cp) {
+		/* Channel report word pending */
+		mcck->channel_report = 1;
+		set_cpu_flag(CIF_MCCK_PENDING);
+	}
+	if (mci->w) {
+		/* Warning pending */
+		mcck->warning = 1;
+		set_cpu_flag(CIF_MCCK_PENDING);
+	}
+	nmi_exit();
+}
+
+static int __init machine_check_init(void)
+{
+	ctl_set_bit(14, 25);	/* enable external damage MCH */
+	ctl_set_bit(14, 27);	/* enable system recovery MCH */
+	ctl_set_bit(14, 24);	/* enable warning MCH */
+	return 0;
+}
+arch_initcall(machine_check_init);
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
new file mode 100644
index 000000000..d112fc66f
--- /dev/null
+++ b/arch/s390/kernel/os_info.c
@@ -0,0 +1,168 @@
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "os_info"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/crash_dump.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/checksum.h>
+#include <asm/lowcore.h>
+#include <asm/os_info.h>
+
+/*
+ * OS info structure has to be page aligned
+ */
+static struct os_info os_info __page_aligned_data;
+
+/*
+ * Compute checksum over OS info structure
+ */
+u32 os_info_csum(struct os_info *os_info)
+{
+	int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
+	return csum_partial(&os_info->version_major, size, 0);
+}
+
+/*
+ * Add crashkernel info to OS info and update checksum
+ */
+void os_info_crashkernel_add(unsigned long base, unsigned long size)
+{
+	os_info.crashkernel_addr = (u64)(unsigned long)base;
+	os_info.crashkernel_size = (u64)(unsigned long)size;
+	os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info entry and update checksum
+ */
+void os_info_entry_add(int nr, void *ptr, u64 size)
+{
+	os_info.entry[nr].addr = (u64)(unsigned long)ptr;
+	os_info.entry[nr].size = size;
+	os_info.entry[nr].csum = csum_partial(ptr, size, 0);
+	os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Initialize OS info struture and set lowcore pointer
+ */
+void __init os_info_init(void)
+{
+	void *ptr = &os_info;
+
+	os_info.version_major = OS_INFO_VERSION_MAJOR;
+	os_info.version_minor = OS_INFO_VERSION_MINOR;
+	os_info.magic = OS_INFO_MAGIC;
+	os_info.csum = os_info_csum(&os_info);
+	mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static struct os_info *os_info_old;
+
+/*
+ * Allocate and copy OS info entry from oldmem
+ */
+static void os_info_old_alloc(int nr, int align)
+{
+	unsigned long addr, size = 0;
+	char *buf, *buf_align, *msg;
+	u32 csum;
+
+	addr = os_info_old->entry[nr].addr;
+	if (!addr) {
+		msg = "not available";
+		goto fail;
+	}
+	size = os_info_old->entry[nr].size;
+	buf = kmalloc(size + align - 1, GFP_KERNEL);
+	if (!buf) {
+		msg = "alloc failed";
+		goto fail;
+	}
+	buf_align = PTR_ALIGN(buf, align);
+	if (copy_from_oldmem(buf_align, (void *) addr, size)) {
+		msg = "copy failed";
+		goto fail_free;
+	}
+	csum = csum_partial(buf_align, size, 0);
+	if (csum != os_info_old->entry[nr].csum) {
+		msg = "checksum failed";
+		goto fail_free;
+	}
+	os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align;
+	msg = "copied";
+	goto out;
+fail_free:
+	kfree(buf);
+fail:
+	os_info_old->entry[nr].addr = 0;
+out:
+	pr_info("entry %i: %s (addr=0x%lx size=%lu)\n",
+		nr, msg, addr, size);
+}
+
+/*
+ * Initialize os info and os info entries from oldmem
+ */
+static void os_info_old_init(void)
+{
+	static int os_info_init;
+	unsigned long addr;
+
+	if (os_info_init)
+		return;
+	if (!OLDMEM_BASE)
+		goto fail;
+	if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr)))
+		goto fail;
+	if (addr == 0 || addr % PAGE_SIZE)
+		goto fail;
+	os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
+	if (!os_info_old)
+		goto fail;
+	if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old)))
+		goto fail_free;
+	if (os_info_old->magic != OS_INFO_MAGIC)
+		goto fail_free;
+	if (os_info_old->csum != os_info_csum(os_info_old))
+		goto fail_free;
+	if (os_info_old->version_major > OS_INFO_VERSION_MAJOR)
+		goto fail_free;
+	os_info_old_alloc(OS_INFO_VMCOREINFO, 1);
+	os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1);
+	pr_info("crashkernel: addr=0x%lx size=%lu\n",
+		(unsigned long) os_info_old->crashkernel_addr,
+		(unsigned long) os_info_old->crashkernel_size);
+	os_info_init = 1;
+	return;
+fail_free:
+	kfree(os_info_old);
+fail:
+	os_info_init = 1;
+	os_info_old = NULL;
+}
+
+/*
+ * Return pointer to os infor entry and its size
+ */
+void *os_info_old_entry(int nr, unsigned long *size)
+{
+	os_info_old_init();
+
+	if (!os_info_old)
+		return NULL;
+	if (!os_info_old->entry[nr].addr)
+		return NULL;
+	*size = (unsigned long) os_info_old->entry[nr].size;
+	return (void *)(unsigned long)os_info_old->entry[nr].addr;
+}
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
new file mode 100644
index 000000000..56fdad479
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -0,0 +1,696 @@
+/*
+ * Performance event support for s390x - CPU-measurement Counter Facility
+ *
+ *  Copyright IBM Corp. 2012
+ *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT	"cpum_cf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <asm/ctl_reg.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+
+/* CPU-measurement counter facility supports these CPU counter sets:
+ * For CPU counter sets:
+ *    Basic counter set:	     0-31
+ *    Problem-state counter set:    32-63
+ *    Crypto-activity counter set:  64-127
+ *    Extented counter set:	   128-159
+ */
+enum cpumf_ctr_set {
+	/* CPU counter sets */
+	CPUMF_CTR_SET_BASIC   = 0,
+	CPUMF_CTR_SET_USER    = 1,
+	CPUMF_CTR_SET_CRYPTO  = 2,
+	CPUMF_CTR_SET_EXT     = 3,
+
+	/* Maximum number of counter sets */
+	CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT    16
+#define CPUMF_LCCTL_ACTCTL_SHIFT     0
+static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
+	[CPUMF_CTR_SET_BASIC]	= 0x02,
+	[CPUMF_CTR_SET_USER]	= 0x04,
+	[CPUMF_CTR_SET_CRYPTO]	= 0x08,
+	[CPUMF_CTR_SET_EXT]	= 0x01,
+};
+
+static void ctr_set_enable(u64 *state, int ctr_set)
+{
+	*state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+static void ctr_set_disable(u64 *state, int ctr_set)
+{
+	*state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+static void ctr_set_start(u64 *state, int ctr_set)
+{
+	*state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+static void ctr_set_stop(u64 *state, int ctr_set)
+{
+	*state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+/* Local CPUMF event structure */
+struct cpu_hw_events {
+	struct cpumf_ctr_info	info;
+	atomic_t		ctr_set[CPUMF_CTR_SET_MAX];
+	u64			state, tx_state;
+	unsigned int		flags;
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+	.ctr_set = {
+		[CPUMF_CTR_SET_BASIC]  = ATOMIC_INIT(0),
+		[CPUMF_CTR_SET_USER]   = ATOMIC_INIT(0),
+		[CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
+		[CPUMF_CTR_SET_EXT]    = ATOMIC_INIT(0),
+	},
+	.state = 0,
+	.flags = 0,
+};
+
+static int get_counter_set(u64 event)
+{
+	int set = -1;
+
+	if (event < 32)
+		set = CPUMF_CTR_SET_BASIC;
+	else if (event < 64)
+		set = CPUMF_CTR_SET_USER;
+	else if (event < 128)
+		set = CPUMF_CTR_SET_CRYPTO;
+	else if (event < 256)
+		set = CPUMF_CTR_SET_EXT;
+
+	return set;
+}
+
+static int validate_event(const struct hw_perf_event *hwc)
+{
+	switch (hwc->config_base) {
+	case CPUMF_CTR_SET_BASIC:
+	case CPUMF_CTR_SET_USER:
+	case CPUMF_CTR_SET_CRYPTO:
+	case CPUMF_CTR_SET_EXT:
+		/* check for reserved counters */
+		if ((hwc->config >=  6 && hwc->config <=  31) ||
+		    (hwc->config >= 38 && hwc->config <=  63) ||
+		    (hwc->config >= 80 && hwc->config <= 127))
+			return -EOPNOTSUPP;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int validate_ctr_version(const struct hw_perf_event *hwc)
+{
+	struct cpu_hw_events *cpuhw;
+	int err = 0;
+
+	cpuhw = &get_cpu_var(cpu_hw_events);
+
+	/* check required version for counter sets */
+	switch (hwc->config_base) {
+	case CPUMF_CTR_SET_BASIC:
+	case CPUMF_CTR_SET_USER:
+		if (cpuhw->info.cfvn < 1)
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_CRYPTO:
+	case CPUMF_CTR_SET_EXT:
+		if (cpuhw->info.csvn < 1)
+			err = -EOPNOTSUPP;
+		if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
+		    (cpuhw->info.csvn == 2 && hwc->config > 175) ||
+		    (cpuhw->info.csvn  > 2 && hwc->config > 255))
+			err = -EOPNOTSUPP;
+		break;
+	}
+
+	put_cpu_var(cpu_hw_events);
+	return err;
+}
+
+static int validate_ctr_auth(const struct hw_perf_event *hwc)
+{
+	struct cpu_hw_events *cpuhw;
+	u64 ctrs_state;
+	int err = 0;
+
+	cpuhw = &get_cpu_var(cpu_hw_events);
+
+	/* check authorization for cpu counter sets */
+	ctrs_state = cpumf_state_ctl[hwc->config_base];
+	if (!(ctrs_state & cpuhw->info.auth_ctl))
+		err = -EPERM;
+
+	put_cpu_var(cpu_hw_events);
+	return err;
+}
+
+/*
+ * Change the CPUMF state to active.
+ * Enable and activate the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	int err;
+
+	if (cpuhw->flags & PMU_F_ENABLED)
+		return;
+
+	err = lcctl(cpuhw->state);
+	if (err) {
+		pr_err("Enabling the performance measuring unit "
+		       "failed with rc=%x\n", err);
+		return;
+	}
+
+	cpuhw->flags |= PMU_F_ENABLED;
+}
+
+/*
+ * Change the CPUMF state to inactive.
+ * Disable and enable (inactive) the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	int err;
+	u64 inactive;
+
+	if (!(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+	err = lcctl(inactive);
+	if (err) {
+		pr_err("Disabling the performance measuring unit "
+		       "failed with rc=%x\n", err);
+		return;
+	}
+
+	cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events = ATOMIC_INIT(0);
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_hw_events *cpuhw;
+
+	if (!(alert & CPU_MF_INT_CF_MASK))
+		return;
+
+	inc_irq_stat(IRQEXT_CMC);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	/* Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case. */
+	if (!(cpuhw->flags & PMU_F_RESERVED))
+		return;
+
+	/* counter authorization change alert */
+	if (alert & CPU_MF_INT_CF_CACA)
+		qctri(&cpuhw->info);
+
+	/* loss of counter data alert */
+	if (alert & CPU_MF_INT_CF_LCDA)
+		pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+}
+
+#define PMC_INIT      0
+#define PMC_RELEASE   1
+static void setup_pmc_cpu(void *flags)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	switch (*((int *) flags)) {
+	case PMC_INIT:
+		memset(&cpuhw->info, 0, sizeof(cpuhw->info));
+		qctri(&cpuhw->info);
+		cpuhw->flags |= PMU_F_RESERVED;
+		break;
+
+	case PMC_RELEASE:
+		cpuhw->flags &= ~PMU_F_RESERVED;
+		break;
+	}
+
+	/* Disable CPU counter sets */
+	lcctl(0);
+}
+
+/* Initialize the CPU-measurement facility */
+static int reserve_pmc_hardware(void)
+{
+	int flags = PMC_INIT;
+
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+	return 0;
+}
+
+/* Release the CPU-measurement facility */
+static void release_pmc_hardware(void)
+{
+	int flags = PMC_RELEASE;
+
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+}
+
+/* Release the PMU if event is the last perf event */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
+static const int cpumf_generic_events_basic[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = 0,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = 1,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = -1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = -1,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
+};
+/* CPUMF <-> perf event mappings for userspace (problem-state set) */
+static const int cpumf_generic_events_user[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = 32,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = 33,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = -1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = -1,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
+};
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	int err;
+	u64 ev;
+
+	switch (attr->type) {
+	case PERF_TYPE_RAW:
+		/* Raw events are used to access counters directly,
+		 * hence do not permit excludes */
+		if (attr->exclude_kernel || attr->exclude_user ||
+		    attr->exclude_hv)
+			return -EOPNOTSUPP;
+		ev = attr->config;
+		break;
+
+	case PERF_TYPE_HARDWARE:
+		ev = attr->config;
+		/* Count user space (problem-state) only */
+		if (!attr->exclude_user && attr->exclude_kernel) {
+			if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+				return -EOPNOTSUPP;
+			ev = cpumf_generic_events_user[ev];
+
+		/* No support for kernel space counters only */
+		} else if (!attr->exclude_kernel && attr->exclude_user) {
+			return -EOPNOTSUPP;
+
+		/* Count user and kernel space */
+		} else {
+			if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+				return -EOPNOTSUPP;
+			ev = cpumf_generic_events_basic[ev];
+		}
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	if (ev == -1)
+		return -ENOENT;
+
+	if (ev >= PERF_CPUM_CF_MAX_CTR)
+		return -EINVAL;
+
+	/* Use the hardware perf event structure to store the counter number
+	 * in 'config' member and the counter set to which the counter belongs
+	 * in the 'config_base'.  The counter set (config_base) is then used
+	 * to enable/disable the counters.
+	 */
+	hwc->config = ev;
+	hwc->config_base = get_counter_set(ev);
+
+	/* Validate the counter that is assigned to this event.
+	 * Because the counter facility can use numerous counters at the
+	 * same time without constraints, it is not necessary to explicity
+	 * validate event groups (event->group_leader != event).
+	 */
+	err = validate_event(hwc);
+	if (err)
+		return err;
+
+	/* Initialize for using the CPU-measurement counter facility */
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	/* Finally, validate version and authorization of the counter set */
+	err = validate_ctr_auth(hwc);
+	if (!err)
+		err = validate_ctr_version(hwc);
+
+	return err;
+}
+
+static int cpumf_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_RAW:
+		err = __hw_perf_event_init(event);
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	if (unlikely(err) && event->destroy)
+		event->destroy(event);
+
+	return err;
+}
+
+static int hw_perf_event_reset(struct perf_event *event)
+{
+	u64 prev, new;
+	int err;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		err = ecctr(event->hw.config, &new);
+		if (err) {
+			if (err != 3)
+				break;
+			/* The counter is not (yet) available. This
+			 * might happen if the counter set to which
+			 * this counter belongs is in the disabled
+			 * state.
+			 */
+			new = 0;
+		}
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	return err;
+}
+
+static int hw_perf_event_update(struct perf_event *event)
+{
+	u64 prev, new, delta;
+	int err;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		err = ecctr(event->hw.config, &new);
+		if (err)
+			goto out;
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	delta = (prev <= new) ? new - prev
+			      : (-1ULL - prev) + new + 1;	 /* overflow */
+	local64_add(delta, &event->count);
+out:
+	return err;
+}
+
+static void cpumf_pmu_read(struct perf_event *event)
+{
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	hw_perf_event_update(event);
+}
+
+static void cpumf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+		return;
+
+	if (WARN_ON_ONCE(hwc->config == -1))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+
+	/* (Re-)enable and activate the counter set */
+	ctr_set_enable(&cpuhw->state, hwc->config_base);
+	ctr_set_start(&cpuhw->state, hwc->config_base);
+
+	/* The counter set to which this counter belongs can be already active.
+	 * Because all counters in a set are active, the event->hw.prev_count
+	 * needs to be synchronized.  At this point, the counter set can be in
+	 * the inactive or disabled state.
+	 */
+	hw_perf_event_reset(event);
+
+	/* increment refcount for this counter set */
+	atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
+}
+
+static void cpumf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		/* Decrement reference count for this counter set and if this
+		 * is the last used counter in the set, clear activation
+		 * control and set the counter set state to inactive.
+		 */
+		if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
+			ctr_set_stop(&cpuhw->state, hwc->config_base);
+		event->hw.state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		hw_perf_event_update(event);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int cpumf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	/* Check authorization for the counter set to which this
+	 * counter belongs.
+	 * For group events transaction, the authorization check is
+	 * done in cpumf_pmu_commit_txn().
+	 */
+	if (!(cpuhw->flags & PERF_EVENT_TXN))
+		if (validate_ctr_auth(&event->hw))
+			return -EPERM;
+
+	ctr_set_enable(&cpuhw->state, event->hw.config_base);
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		cpumf_pmu_start(event, PERF_EF_RELOAD);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void cpumf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	cpumf_pmu_stop(event, PERF_EF_UPDATE);
+
+	/* Check if any counter in the counter set is still used.  If not used,
+	 * change the counter set to the disabled state.  This also clears the
+	 * content of all counters in the set.
+	 *
+	 * When a new perf event has been added but not yet started, this can
+	 * clear enable control and resets all counters in a set.  Therefore,
+	 * cpumf_pmu_start() always has to reenable a counter set.
+	 */
+	if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
+		ctr_set_disable(&cpuhw->state, event->hw.config_base);
+
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Start group events scheduling transaction.
+ * Set flags to perform a single test at commit time.
+ */
+static void cpumf_pmu_start_txn(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	perf_pmu_disable(pmu);
+	cpuhw->flags |= PERF_EVENT_TXN;
+	cpuhw->tx_state = cpuhw->state;
+}
+
+/*
+ * Stop and cancel a group events scheduling tranctions.
+ * Assumes cpumf_pmu_del() is called for each successful added
+ * cpumf_pmu_add() during the transaction.
+ */
+static void cpumf_pmu_cancel_txn(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON(cpuhw->tx_state != cpuhw->state);
+
+	cpuhw->flags &= ~PERF_EVENT_TXN;
+	perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit the group events scheduling transaction.  On success, the
+ * transaction is closed.   On error, the transaction is kept open
+ * until cpumf_pmu_cancel_txn() is called.
+ */
+static int cpumf_pmu_commit_txn(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	u64 state;
+
+	/* check if the updated state can be scheduled */
+	state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+	state >>= CPUMF_LCCTL_ENABLE_SHIFT;
+	if ((state & cpuhw->info.auth_ctl) != state)
+		return -EPERM;
+
+	cpuhw->flags &= ~PERF_EVENT_TXN;
+	perf_pmu_enable(pmu);
+	return 0;
+}
+
+/* Performance monitoring unit for s390x */
+static struct pmu cpumf_pmu = {
+	.pmu_enable   = cpumf_pmu_enable,
+	.pmu_disable  = cpumf_pmu_disable,
+	.event_init   = cpumf_pmu_event_init,
+	.add	      = cpumf_pmu_add,
+	.del	      = cpumf_pmu_del,
+	.start	      = cpumf_pmu_start,
+	.stop	      = cpumf_pmu_stop,
+	.read	      = cpumf_pmu_read,
+	.start_txn    = cpumf_pmu_start_txn,
+	.commit_txn   = cpumf_pmu_commit_txn,
+	.cancel_txn   = cpumf_pmu_cancel_txn,
+};
+
+static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
+			      void *hcpu)
+{
+	unsigned int cpu = (long) hcpu;
+	int flags;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+		flags = PMC_INIT;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	case CPU_DOWN_PREPARE:
+		flags = PMC_RELEASE;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __init cpumf_pmu_init(void)
+{
+	int rc;
+
+	if (!cpum_cf_avail())
+		return -ENODEV;
+
+	/* clear bit 15 of cr0 to unauthorize problem-state to
+	 * extract measurement counters */
+	ctl_clear_bit(0, 48);
+
+	/* register handler for measurement-alert interruptions */
+	rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+				   cpumf_measurement_alert);
+	if (rc) {
+		pr_err("Registering for CPU-measurement alerts "
+		       "failed with rc=%i\n", rc);
+		goto out;
+	}
+
+	/* The CPU measurement counter facility does not have overflow
+	 * interrupts to do sampling.  Sampling must be provided by
+	 * external means, for example, by timers.
+	 */
+	cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	cpumf_pmu.attr_groups = cpumf_cf_event_group();
+	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+	if (rc) {
+		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+		unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+					cpumf_measurement_alert);
+		goto out;
+	}
+	perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+	return rc;
+}
+early_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 000000000..4554a4bae
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,322 @@
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+
+static struct attribute *cpumcf_pmu_event_attr[] = {
+	CPUMF_EVENT_PTR(cf, CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+	NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+	NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumcf_pmu_event_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+					    struct attribute **b)
+{
+	struct attribute **new;
+	int j, i;
+
+	for (j = 0; a[j]; j++)
+		;
+	for (i = 0; b[i]; i++)
+		j++;
+	j++;
+
+	new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+	if (!new)
+		return NULL;
+	j = 0;
+	for (i = 0; a[i]; i++)
+		new[j++] = a[i];
+	for (i = 0; b[i]; i++)
+		new[j++] = b[i];
+	new[j] = NULL;
+
+	return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+	struct attribute **combined, **model;
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x2097:
+	case 0x2098:
+		model = cpumcf_z10_pmu_event_attr;
+		break;
+	case 0x2817:
+	case 0x2818:
+		model = cpumcf_z196_pmu_event_attr;
+		break;
+	case 0x2827:
+	case 0x2828:
+		model = cpumcf_zec12_pmu_event_attr;
+		break;
+	default:
+		model = NULL;
+		break;
+	};
+
+	if (!model)
+		goto out;
+
+	combined = merge_attr(cpumcf_pmu_event_attr, model);
+	if (combined)
+		cpumsf_pmu_events_group.attrs = combined;
+out:
+	return cpumsf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 000000000..e6a1578fc
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,1639 @@
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT	"cpum_sf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT	1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE	((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET	(CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+	return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account.  Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size		Buffer characteristics
+ * ---------------------------------------------------
+ *	 64KB		    ==	  16 pages (4KB per page)
+ *				   1 page  for SDB-tables
+ *				  15 pages for SDBs
+ *
+ *  32MB		    ==	8192 pages (4KB per page)
+ *				  16 pages for SDB-tables
+ *				8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
+	/* buffer characteristics (required for buffer increments) */
+	unsigned long  num_sdb;	    /* Number of sample-data-blocks */
+	unsigned long num_sdbt;	    /* Number of sample-data-block-tables */
+	unsigned long	 *tail;	    /* last sample-data-block-table */
+};
+
+struct cpu_hw_sf {
+	/* CPU-measurement sampling information block */
+	struct hws_qsi_info_block qsi;
+	/* CPU-measurement sampling control block */
+	struct hws_lsctl_request_block lsctl;
+	struct sf_buffer sfb;	    /* Sampling buffer */
+	unsigned int flags;	    /* Status flags */
+	struct perf_event *event;   /* Scheduled perf event */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+	struct hws_lsctl_request_block sreq;
+
+	memset(&sreq, 0, sizeof(sreq));
+	return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+	return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+	unsigned long *sdbt, *curr;
+
+	if (!sfb->sdbt)
+		return;
+
+	sdbt = sfb->sdbt;
+	curr = sdbt;
+
+	/* Free the SDBT after all SDBs are processed... */
+	while (1) {
+		if (!*curr || !sdbt)
+			break;
+
+		/* Process table-link entries */
+		if (is_link_entry(curr)) {
+			curr = get_next_sdbt(curr);
+			if (sdbt)
+				free_page((unsigned long) sdbt);
+
+			/* If the origin is reached, sampling buffer is freed */
+			if (curr == sfb->sdbt)
+				break;
+			else
+				sdbt = curr;
+		} else {
+			/* Process SDB pointer */
+			if (*curr) {
+				free_page(*curr);
+				curr++;
+			}
+		}
+	}
+
+	debug_sprintf_event(sfdbg, 5,
+			    "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+	memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+	unsigned long sdb, *trailer;
+
+	/* Allocate and initialize sample-data-block */
+	sdb = get_zeroed_page(gfp_flags);
+	if (!sdb)
+		return -ENOMEM;
+	trailer = trailer_entry_ptr(sdb);
+	*trailer = SDB_TE_ALERT_REQ_MASK;
+
+	/* Link SDB into the sample-data-block-table */
+	*sdbt = sdb;
+
+	return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ *	      sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+				   unsigned long num_sdb, gfp_t gfp_flags)
+{
+	int i, rc;
+	unsigned long *new, *tail;
+
+	if (!sfb->sdbt || !sfb->tail)
+		return -EINVAL;
+
+	if (!is_link_entry(sfb->tail))
+		return -EINVAL;
+
+	/* Append to the existing sampling buffer, overwriting the table-link
+	 * register.
+	 * The tail variables always points to the "tail" (last and table-link)
+	 * entry in an SDB-table.
+	 */
+	tail = sfb->tail;
+
+	/* Do a sanity check whether the table-link entry points to
+	 * the sampling buffer origin.
+	 */
+	if (sfb->sdbt != get_next_sdbt(tail)) {
+		debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+				    "sampling buffer is not linked: origin=%p"
+				    "tail=%p\n",
+				    (void *) sfb->sdbt, (void *) tail);
+		return -EINVAL;
+	}
+
+	/* Allocate remaining SDBs */
+	rc = 0;
+	for (i = 0; i < num_sdb; i++) {
+		/* Allocate a new SDB-table if it is full. */
+		if (require_table_link(tail)) {
+			new = (unsigned long *) get_zeroed_page(gfp_flags);
+			if (!new) {
+				rc = -ENOMEM;
+				break;
+			}
+			sfb->num_sdbt++;
+			/* Link current page to tail of chain */
+			*tail = (unsigned long)(void *) new + 1;
+			tail = new;
+		}
+
+		/* Allocate a new sample-data-block.
+		 * If there is not enough memory, stop the realloc process
+		 * and simply use what was allocated.  If this is a temporary
+		 * issue, a new realloc call (if required) might succeed.
+		 */
+		rc = alloc_sample_data_block(tail, gfp_flags);
+		if (rc)
+			break;
+		sfb->num_sdb++;
+		tail++;
+	}
+
+	/* Link sampling buffer to its origin */
+	*tail = (unsigned long) sfb->sdbt + 1;
+	sfb->tail = tail;
+
+	debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+			    " settings: sdbt=%lu sdb=%lu\n",
+			    sfb->num_sdbt, sfb->num_sdb);
+	return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB).  For each allocation,
+ * a 4K page is used.  The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+	int rc;
+
+	if (sfb->sdbt)
+		return -EINVAL;
+
+	/* Allocate the sample-data-block-table origin */
+	sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+	if (!sfb->sdbt)
+		return -ENOMEM;
+	sfb->num_sdb = 0;
+	sfb->num_sdbt = 1;
+
+	/* Link the table origin to point to itself to prepare for
+	 * realloc_sampling_buffer() invocation.
+	 */
+	sfb->tail = sfb->sdbt;
+	*sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+
+	/* Allocate requested number of sample-data-blocks */
+	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+	if (rc) {
+		free_sampling_buffer(sfb);
+		debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+			"realloc_sampling_buffer failed with rc=%i\n", rc);
+	} else
+		debug_sprintf_event(sfdbg, 4,
+			"alloc_sampling_buffer: tear=%p dear=%p\n",
+			sfb->sdbt, (void *) *sfb->sdbt);
+	return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+	struct hws_qsi_info_block si;
+
+	CPUM_SF_MIN_SDB = min;
+	CPUM_SF_MAX_SDB = max;
+
+	memset(&si, 0, sizeof(si));
+	if (!qsi(&si))
+		CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+	return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+				    : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+					struct hw_perf_event *hwc)
+{
+	if (!sfb->sdbt)
+		return SFB_ALLOC_REG(hwc);
+	if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+		return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+	return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	/* Limit the number of SDBs to not exceed the maximum */
+	num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+	if (num)
+		SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	SFB_ALLOC_REG(hwc) = 0;
+	sfb_account_allocs(num, hwc);
+}
+
+static size_t event_sample_size(struct hw_perf_event *hwc)
+{
+	struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+	size_t sample_size;
+
+	/* The sample size depends on the sampling function: The basic-sampling
+	 * function must be always enabled, diagnostic-sampling function is
+	 * optional.
+	 */
+	sample_size = sfr->bsdes;
+	if (SAMPL_DIAG_MODE(hwc))
+		sample_size += sfr->dsdes;
+
+	return sample_size;
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+	if (cpuhw->sfb.sdbt)
+		free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+	unsigned long n_sdb, freq, factor;
+	size_t sfr_size, sample_size;
+	struct sf_raw_sample *sfr;
+
+	/* Allocate raw sample buffer
+	 *
+	 *    The raw sample buffer is used to temporarily store sampling data
+	 *    entries for perf raw sample processing.  The buffer size mainly
+	 *    depends on the size of diagnostic-sampling data entries which is
+	 *    machine-specific.  The exact size calculation includes:
+	 *	1. The first 4 bytes of diagnostic-sampling data entries are
+	 *	   already reflected in the sf_raw_sample structure.  Subtract
+	 *	   these bytes.
+	 *	2. The perf raw sample data must be 8-byte aligned (u64) and
+	 *	   perf's internal data size must be considered too.  So add
+	 *	   an additional u32 for correct alignment and subtract before
+	 *	   allocating the buffer.
+	 *	3. Store the raw sample buffer pointer in the perf event
+	 *	   hardware structure.
+	 */
+	sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
+			 sizeof(u32), sizeof(u64));
+	sfr_size -= sizeof(u32);
+	sfr = kzalloc(sfr_size, GFP_KERNEL);
+	if (!sfr)
+		return -ENOMEM;
+	sfr->size = sfr_size;
+	sfr->bsdes = cpuhw->qsi.bsdes;
+	sfr->dsdes = cpuhw->qsi.dsdes;
+	RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
+
+	/* Calculate sampling buffers using 4K pages
+	 *
+	 *    1. Determine the sample data size which depends on the used
+	 *	 sampling functions, for example, basic-sampling or
+	 *	 basic-sampling with diagnostic-sampling.
+	 *
+	 *    2. Use the sampling frequency as input.  The sampling buffer is
+	 *	 designed for almost one second.  This can be adjusted through
+	 *	 the "factor" variable.
+	 *	 In any case, alloc_sampling_buffer() sets the Alert Request
+	 *	 Control indicator to trigger a measurement-alert to harvest
+	 *	 sample-data-blocks (sdb).
+	 *
+	 *    3. Compute the number of sample-data-blocks and ensure a minimum
+	 *	 of CPUM_SF_MIN_SDB.  Also ensure the upper limit does not
+	 *	 exceed a "calculated" maximum.  The symbolic maximum is
+	 *	 designed for basic-sampling only and needs to be increased if
+	 *	 diagnostic-sampling is active.
+	 *	 See also the remarks for these symbolic constants.
+	 *
+	 *    4. Compute the number of sample-data-block-tables (SDBT) and
+	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+	 *	 to 511 SDBs).
+	 */
+	sample_size = event_sample_size(hwc);
+	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+	factor = 1;
+	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
+	if (n_sdb < CPUM_SF_MIN_SDB)
+		n_sdb = CPUM_SF_MIN_SDB;
+
+	/* If there is already a sampling buffer allocated, it is very likely
+	 * that the sampling facility is enabled too.  If the event to be
+	 * initialized requires a greater sampling buffer, the allocation must
+	 * be postponed.  Changing the sampling buffer requires the sampling
+	 * facility to be in the disabled state.  So, account the number of
+	 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+	 * before the event is started.
+	 */
+	sfb_init_allocs(n_sdb, hwc);
+	if (sf_buffer_available(cpuhw))
+		return 0;
+
+	debug_sprintf_event(sfdbg, 3,
+			    "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+			    " sample_size=%lu cpuhw=%p\n",
+			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+			    sample_size, cpuhw);
+
+	return alloc_sampling_buffer(&cpuhw->sfb,
+				     sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+				 unsigned long min)
+{
+	return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+	/* Use a percentage-based approach to extend the sampling facility
+	 * buffer.  Accept up to 5% sample data loss.
+	 * Vary the extents between 1% to 5% of the current number of
+	 * sample-data-blocks.
+	 */
+	if (ratio <= 5)
+		return 0;
+	if (ratio <= 25)
+		return min_percent(1, base, 1);
+	if (ratio <= 50)
+		return min_percent(1, base, 1);
+	if (ratio <= 75)
+		return min_percent(2, base, 2);
+	if (ratio <= 100)
+		return min_percent(3, base, 3);
+	if (ratio <= 250)
+		return min_percent(4, base, 4);
+
+	return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+				  struct hw_perf_event *hwc)
+{
+	unsigned long ratio, num;
+
+	if (!OVERFLOW_REG(hwc))
+		return;
+
+	/* The sample_overflow contains the average number of sample data
+	 * that has been lost because sample-data-blocks were full.
+	 *
+	 * Calculate the total number of sample data entries that has been
+	 * discarded.  Then calculate the ratio of lost samples to total samples
+	 * per second in percent.
+	 */
+	ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+			     sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+	/* Compute number of sample-data-blocks */
+	num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+	if (num)
+		sfb_account_allocs(num, hwc);
+
+	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+			    " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+	OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb:	Sampling buffer structure (for local CPU)
+ * @hwc:	Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ *	      change the sampling buffer structure.  Do not call this function
+ *	      when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	unsigned long num, num_old;
+	int rc;
+
+	num = sfb_pending_allocs(sfb, hwc);
+	if (!num)
+		return;
+	num_old = sfb->num_sdb;
+
+	/* Disable the sampling facility to reset any states and also
+	 * clear pending measurement alerts.
+	 */
+	sf_disable();
+
+	/* Extend the sampling buffer.
+	 * This memory allocation typically happens in an atomic context when
+	 * called by perf.  Because this is a reallocation, it is fine if the
+	 * new SDB-request cannot be satisfied immediately.
+	 */
+	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+	if (rc)
+		debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+				    "failed with rc=%i\n", rc);
+
+	if (sfb_has_pending_allocs(sfb, hwc))
+		debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+				    "req=%lu alloc=%lu remaining=%lu\n",
+				    num, sfb->num_sdb - num_old,
+				    sfb_pending_allocs(sfb, hwc));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT      0
+#define PMC_RELEASE   1
+#define PMC_FAILURE   2
+static void setup_pmc_cpu(void *flags)
+{
+	int err;
+	struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
+
+	err = 0;
+	switch (*((int *) flags)) {
+	case PMC_INIT:
+		memset(cpusf, 0, sizeof(*cpusf));
+		err = qsi(&cpusf->qsi);
+		if (err)
+			break;
+		cpusf->flags |= PMU_F_RESERVED;
+		err = sf_disable();
+		if (err)
+			pr_err("Switching off the sampling facility failed "
+			       "with rc=%i\n", err);
+		debug_sprintf_event(sfdbg, 5,
+				    "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+		break;
+	case PMC_RELEASE:
+		cpusf->flags &= ~PMU_F_RESERVED;
+		err = sf_disable();
+		if (err) {
+			pr_err("Switching off the sampling facility failed "
+			       "with rc=%i\n", err);
+		} else
+			deallocate_buffers(cpusf);
+		debug_sprintf_event(sfdbg, 5,
+				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+		break;
+	}
+	if (err)
+		*((int *) flags) |= PMC_FAILURE;
+}
+
+static void release_pmc_hardware(void)
+{
+	int flags = PMC_RELEASE;
+
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	perf_release_sampling();
+}
+
+static int reserve_pmc_hardware(void)
+{
+	int flags = PMC_INIT;
+	int err;
+
+	err = perf_reserve_sampling();
+	if (err)
+		return err;
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	if (flags & PMC_FAILURE) {
+		release_pmc_hardware();
+		return -ENODEV;
+	}
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+	return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Free raw sample buffer */
+	if (RAWSAMPLE_REG(&event->hw))
+		kfree((void *) RAWSAMPLE_REG(&event->hw));
+
+	/* Release PMC if this is the last perf event */
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+	hwc->sample_period = period;
+	hwc->last_period = hwc->sample_period;
+	local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+			       unsigned long *sdbt_origin)
+{
+	struct sf_raw_sample *sfr;
+
+	/* (Re)set to first sample-data-block-table */
+	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+
+	/* (Re)set raw sampling buffer register */
+	sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+	memset(&sfr->basic, 0, sizeof(sfr->basic));
+	memset(&sfr->diag, 0, sfr->dsdes);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+				   unsigned long rate)
+{
+	return clamp_t(unsigned long, rate,
+		       si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct cpu_hw_sf *cpuhw;
+	struct hws_qsi_info_block si;
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long rate;
+	int cpu, err;
+
+	/* Reserve CPU-measurement sampling facility */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		goto out;
+
+	/* Access per-CPU sampling information (query sampling info) */
+	/*
+	 * The event->cpu value can be -1 to count on every CPU, for example,
+	 * when attaching to a task.  If this is specified, use the query
+	 * sampling info from the current CPU, otherwise use event->cpu to
+	 * retrieve the per-CPU information.
+	 * Later, cpuhw indicates whether to allocate sampling buffers for a
+	 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+	 */
+	memset(&si, 0, sizeof(si));
+	cpuhw = NULL;
+	if (event->cpu == -1)
+		qsi(&si);
+	else {
+		/* Event is pinned to a particular CPU, retrieve the per-CPU
+		 * sampling structure for accessing the CPU-specific QSI.
+		 */
+		cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+		si = cpuhw->qsi;
+	}
+
+	/* Check sampling facility authorization and, if not authorized,
+	 * fall back to other PMUs.  It is safe to check any CPU because
+	 * the authorization is identical for all configured CPUs.
+	 */
+	if (!si.as) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	/* Always enable basic sampling */
+	SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+	/* Check if diagnostic sampling is requested.  Deny if the required
+	 * sampling authorization is missing.
+	 */
+	if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+		if (!si.ad) {
+			err = -EPERM;
+			goto out;
+		}
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+	}
+
+	/* Check and set other sampling flags */
+	if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
+	/* The sampling information (si) contains information about the
+	 * min/max sampling intervals and the CPU speed.  So calculate the
+	 * correct sampling interval and avoid the whole period adjust
+	 * feedback loop.
+	 */
+	rate = 0;
+	if (attr->freq) {
+		rate = freq_to_sample_rate(&si, attr->sample_freq);
+		rate = hw_limit_rate(&si, rate);
+		attr->freq = 0;
+		attr->sample_period = rate;
+	} else {
+		/* The min/max sampling rates specifies the valid range
+		 * of sample periods.  If the specified sample period is
+		 * out of range, limit the period to the range boundary.
+		 */
+		rate = hw_limit_rate(&si, hwc->sample_period);
+
+		/* The perf core maintains a maximum sample rate that is
+		 * configurable through the sysctl interface.  Ensure the
+		 * sampling rate does not exceed this value.  This also helps
+		 * to avoid throttling when pushing samples with
+		 * perf_event_overflow().
+		 */
+		if (sample_rate_to_freq(&si, rate) >
+		      sysctl_perf_event_sample_rate) {
+			err = -EINVAL;
+			debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+			goto out;
+		}
+	}
+	SAMPL_RATE(hwc) = rate;
+	hw_init_period(hwc, SAMPL_RATE(hwc));
+
+	/* Initialize sample data overflow accounting */
+	hwc->extra_reg.reg = REG_OVERFLOW;
+	OVERFLOW_REG(hwc) = 0;
+
+	/* Allocate the per-CPU sampling buffer using the CPU information
+	 * from the event.  If the event is not pinned to a particular
+	 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+	 * buffers for each online CPU.
+	 */
+	if (cpuhw)
+		/* Event is pinned to a particular CPU */
+		err = allocate_buffers(cpuhw, hwc);
+	else {
+		/* Event is not pinned, allocate sampling buffer on
+		 * each online CPU
+		 */
+		for_each_online_cpu(cpu) {
+			cpuhw = &per_cpu(cpu_hw_sf, cpu);
+			err = allocate_buffers(cpuhw, hwc);
+			if (err)
+				break;
+		}
+	}
+out:
+	return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	/* No support for taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+		if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+		    (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+			return -ENOENT;
+		break;
+	case PERF_TYPE_HARDWARE:
+		/* Support sampling of CPU cycles in addition to the
+		 * counter facility.  However, the counter facility
+		 * is more precise and, hence, restrict this PMU to
+		 * sampling events only.
+		 */
+		if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+			return -ENOENT;
+		if (!is_sampling_event(event))
+			return -ENOENT;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	/* Check online status of the CPU to which the event is pinned */
+	if (event->cpu >= nr_cpumask_bits ||
+	    (event->cpu >= 0 && !cpu_online(event->cpu)))
+		return -ENODEV;
+
+	/* Force reset of idle/hv excludes regardless of what the
+	 * user requested.
+	 */
+	if (event->attr.exclude_hv)
+		event->attr.exclude_hv = 0;
+	if (event->attr.exclude_idle)
+		event->attr.exclude_idle = 0;
+
+	err = __hw_perf_event_init(event);
+	if (unlikely(err))
+		if (event->destroy)
+			event->destroy(event);
+	return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	struct hw_perf_event *hwc;
+	int err;
+
+	if (cpuhw->flags & PMU_F_ENABLED)
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Check whether to extent the sampling buffer.
+	 *
+	 * Two conditions trigger an increase of the sampling buffer for a
+	 * perf event:
+	 *    1. Postponed buffer allocations from the event initialization.
+	 *    2. Sampling overflows that contribute to pending allocations.
+	 *
+	 * Note that the extend_sampling_buffer() function disables the sampling
+	 * facility, but it can be fully re-enabled using sampling controls that
+	 * have been saved in cpumsf_pmu_disable().
+	 */
+	if (cpuhw->event) {
+		hwc = &cpuhw->event->hw;
+		/* Account number of overflow-designated buffer extents */
+		sfb_account_overflows(cpuhw, hwc);
+		if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
+	}
+
+	/* (Re)enable the PMU and sampling facility */
+	cpuhw->flags |= PMU_F_ENABLED;
+	barrier();
+
+	err = lsctl(&cpuhw->lsctl);
+	if (err) {
+		cpuhw->flags &= ~PMU_F_ENABLED;
+		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+			1, err);
+		return;
+	}
+
+	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+			    "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+			    cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+			    (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	struct hws_lsctl_request_block inactive;
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Switch off sampling activation control */
+	inactive = cpuhw->lsctl;
+	inactive.cs = 0;
+	inactive.cd = 0;
+
+	err = lsctl(&inactive);
+	if (err) {
+		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+			2, err);
+		return;
+	}
+
+	/* Save state of TEAR and DEAR register contents */
+	if (!qsi(&si)) {
+		/* TEAR/DEAR values are valid only if the sampling facility is
+		 * enabled.  Note that cpumsf_pmu_disable() might be called even
+		 * for a disabled sampling facility because cpumsf_pmu_enable()
+		 * controls the enable/disable state.
+		 */
+		if (si.es) {
+			cpuhw->lsctl.tear = si.tear;
+			cpuhw->lsctl.dear = si.dear;
+		}
+	} else
+		debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+				    "qsi() failed with err=%i\n", err);
+
+	cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event:	The perf event
+ * @regs:	pt_regs structure
+ * @sde_regs:	Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+			      struct perf_sf_sde_regs *sde_regs)
+{
+	if (event->attr.exclude_user && user_mode(regs))
+		return 1;
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return 1;
+	if (event->attr.exclude_guest && sde_regs->in_guest)
+		return 1;
+	if (event->attr.exclude_host && !sde_regs->in_guest)
+		return 1;
+	return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event:	The perf event
+ * @sample:	Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample.  The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows.  If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
+{
+	int overflow;
+	struct pt_regs regs;
+	struct perf_sf_sde_regs *sde_regs;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+
+	/* Setup perf sample */
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	raw.size = sfr->size;
+	raw.data = sfr;
+	data.raw = &raw;
+
+	/* Setup pt_regs to look like an CPU-measurement external interrupt
+	 * using the Program Request Alert code.  The regs.int_parm_long
+	 * field which is unused contains additional sample-data-entry related
+	 * indicators.
+	 */
+	memset(&regs, 0, sizeof(regs));
+	regs.int_code = 0x1407;
+	regs.int_parm = CPU_MF_INT_SF_PRA;
+	sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+	regs.psw.addr = sfr->basic.ia;
+	if (sfr->basic.T)
+		regs.psw.mask |= PSW_MASK_DAT;
+	if (sfr->basic.W)
+		regs.psw.mask |= PSW_MASK_WAIT;
+	if (sfr->basic.P)
+		regs.psw.mask |= PSW_MASK_PSTATE;
+	switch (sfr->basic.AS) {
+	case 0x0:
+		regs.psw.mask |= PSW_ASC_PRIMARY;
+		break;
+	case 0x1:
+		regs.psw.mask |= PSW_ASC_ACCREG;
+		break;
+	case 0x2:
+		regs.psw.mask |= PSW_ASC_SECONDARY;
+		break;
+	case 0x3:
+		regs.psw.mask |= PSW_ASC_HOME;
+		break;
+	}
+
+	/* The host-program-parameter (hpp) contains the sie control
+	 * block that is set by sie64a() in entry64.S.	Check if hpp
+	 * refers to a valid control block and set sde_regs flags
+	 * accordingly.  This would allow to use hpp values for other
+	 * purposes too.
+	 * For now, simply use a non-zero value as guest indicator.
+	 */
+	if (sfr->basic.hpp)
+		sde_regs->in_guest = 1;
+
+	overflow = 0;
+	if (perf_exclude_event(event, &regs, sde_regs))
+		goto out;
+	if (perf_event_overflow(event, &data, &regs)) {
+		overflow = 1;
+		event->pmu->stop(event, 0);
+	}
+	perf_event_update_userpage(event);
+out:
+	return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+	local64_add(count, &event->count);
+}
+
+static int sample_format_is_valid(struct hws_combined_entry *sample,
+				   unsigned int flags)
+{
+	if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+		/* Only basic-sampling data entries with data-entry-format
+		 * version of 0x0001 can be processed.
+		 */
+		if (sample->basic.def != 0x0001)
+			return 0;
+	if (flags & PERF_CPUM_SF_DIAG_MODE)
+		/* The data-entry-format number of diagnostic-sampling data
+		 * entries can vary.  Because diagnostic data is just passed
+		 * through, do only a sanity check on the DEF.
+		 */
+		if (sample->diag.def < 0x8001)
+			return 0;
+	return 1;
+}
+
+static int sample_is_consistent(struct hws_combined_entry *sample,
+				unsigned long flags)
+{
+	/* This check applies only to basic-sampling data entries of potentially
+	 * combined-sampling data entries.  Invalid entries cannot be processed
+	 * by the PMU and, thus, do not deliver an associated
+	 * diagnostic-sampling data entry.
+	 */
+	if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
+		return 0;
+	/*
+	 * Samples are skipped, if they are invalid or for which the
+	 * instruction address is not predictable, i.e., the wait-state bit is
+	 * set.
+	 */
+	if (sample->basic.I || sample->basic.W)
+		return 0;
+	return 1;
+}
+
+static void reset_sample_slot(struct hws_combined_entry *sample,
+			      unsigned long flags)
+{
+	if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+		sample->basic.def = 0;
+	if (flags & PERF_CPUM_SF_DIAG_MODE)
+		sample->diag.def = 0;
+}
+
+static void sfr_store_sample(struct sf_raw_sample *sfr,
+			     struct hws_combined_entry *sample)
+{
+	if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
+		sfr->basic = sample->basic;
+	if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
+		memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
+}
+
+static void debug_sample_entry(struct hws_combined_entry *sample,
+			       struct hws_trailer_entry *te,
+			       unsigned long flags)
+{
+	debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+			    "sampling data entry: te->f=%i basic.def=%04x (%p)"
+			    " diag.def=%04x (%p)\n", te->f,
+			    sample->basic.def, &sample->basic,
+			    (flags & PERF_CPUM_SF_DIAG_MODE)
+					? sample->diag.def : 0xFFFF,
+			    (flags & PERF_CPUM_SF_DIAG_MODE)
+					?  &sample->diag : NULL);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event:	The perf event
+ * @sdbt:	Sample-data-block table
+ * @overflow:	Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem.  Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries.  A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry.	The sampling function is determined by the flags in the perf
+ * event hardware structure.  The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+			       unsigned long long *overflow)
+{
+	unsigned long flags = SAMPL_FLAGS(&event->hw);
+	struct hws_combined_entry *sample;
+	struct hws_trailer_entry *te;
+	struct sf_raw_sample *sfr;
+	size_t sample_size;
+
+	/* Prepare and initialize raw sample data */
+	sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
+	sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
+
+	sample_size = event_sample_size(&event->hw);
+	te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+	sample = (struct hws_combined_entry *) *sdbt;
+	while ((unsigned long *) sample < (unsigned long *) te) {
+		/* Check for an empty sample */
+		if (!sample->basic.def)
+			break;
+
+		/* Update perf event period */
+		perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+		/* Check sampling data entry */
+		if (sample_format_is_valid(sample, flags)) {
+			/* If an event overflow occurred, the PMU is stopped to
+			 * throttle event delivery.  Remaining sample data is
+			 * discarded.
+			 */
+			if (!*overflow) {
+				if (sample_is_consistent(sample, flags)) {
+					/* Deliver sample data to perf */
+					sfr_store_sample(sfr, sample);
+					*overflow = perf_push_sample(event, sfr);
+				}
+			} else
+				/* Count discarded samples */
+				*overflow += 1;
+		} else {
+			debug_sample_entry(sample, te, flags);
+			/* Sample slot is not yet written or other record.
+			 *
+			 * This condition can occur if the buffer was reused
+			 * from a combined basic- and diagnostic-sampling.
+			 * If only basic-sampling is then active, entries are
+			 * written into the larger diagnostic entries.
+			 * This is typically the case for sample-data-blocks
+			 * that are not full.  Stop processing if the first
+			 * invalid format was detected.
+			 */
+			if (!te->f)
+				break;
+		}
+
+		/* Reset sample slot and advance to next sample */
+		reset_sample_slot(sample, flags);
+		sample += sample_size;
+	}
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event:	The perf event
+ * @flush_all:	Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed.	Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks.  It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set.	The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct hws_trailer_entry *te;
+	unsigned long *sdbt;
+	unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+	int done;
+
+	if (flush_all && SDB_FULL_BLOCKS(hwc))
+		flush_all = 0;
+
+	sdbt = (unsigned long *) TEAR_REG(hwc);
+	done = event_overflow = sampl_overflow = num_sdb = 0;
+	while (!done) {
+		/* Get the trailer entry of the sample-data-block */
+		te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+		/* Leave loop if no more work to do (block full indicator) */
+		if (!te->f) {
+			done = 1;
+			if (!flush_all)
+				break;
+		}
+
+		/* Check the sample overflow count */
+		if (te->overflow)
+			/* Account sample overflows and, if a particular limit
+			 * is reached, extend the sampling buffer.
+			 * For details, see sfb_account_overflows().
+			 */
+			sampl_overflow += te->overflow;
+
+		/* Timestamps are valid for full sample-data-blocks only */
+		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+				    "overflow=%llu timestamp=0x%llx\n",
+				    sdbt, te->overflow,
+				    (te->f) ? trailer_timestamp(te) : 0ULL);
+
+		/* Collect all samples from a single sample-data-block and
+		 * flag if an (perf) event overflow happened.  If so, the PMU
+		 * is stopped and remaining samples will be discarded.
+		 */
+		hw_collect_samples(event, sdbt, &event_overflow);
+		num_sdb++;
+
+		/* Reset trailer (using compare-double-and-swap) */
+		do {
+			te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+			te_flags |= SDB_TE_ALERT_REQ_MASK;
+		} while (!cmpxchg_double(&te->flags, &te->overflow,
+					 te->flags, te->overflow,
+					 te_flags, 0ULL));
+
+		/* Advance to next sample-data-block */
+		sdbt++;
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		/* Update event hardware registers */
+		TEAR_REG(hwc) = (unsigned long) sdbt;
+
+		/* Stop processing sample-data if all samples of the current
+		 * sample-data-block were flushed even if it was not full.
+		 */
+		if (flush_all && done)
+			break;
+
+		/* If an event overflow happened, discard samples by
+		 * processing any remaining sample-data-blocks.
+		 */
+		if (event_overflow)
+			flush_all = 1;
+	}
+
+	/* Account sample overflows in the event hardware structure */
+	if (sampl_overflow)
+		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+						 sampl_overflow, 1 + num_sdb);
+	if (sampl_overflow || event_overflow)
+		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+				    "overflow stats: sample=%llu event=%llu\n",
+				    sampl_overflow, event_overflow);
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+	/* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	perf_pmu_disable(event->pmu);
+	event->hw.state = 0;
+	cpuhw->lsctl.cs = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.cd = 1;
+	perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	perf_pmu_disable(event->pmu);
+	cpuhw->lsctl.cs = 0;
+	cpuhw->lsctl.cd = 0;
+	event->hw.state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+		hw_perf_event_update(event, 1);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	int err;
+
+	if (cpuhw->flags & PMU_F_IN_USE)
+		return -EAGAIN;
+
+	if (!cpuhw->sfb.sdbt)
+		return -EINVAL;
+
+	err = 0;
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* Set up sampling controls.  Always program the sampling register
+	 * using the SDB-table start.  Reset TEAR_REG event hardware register
+	 * that is used by hw_perf_event_update() to store the sampling buffer
+	 * position after samples have been flushed.
+	 */
+	cpuhw->lsctl.s = 0;
+	cpuhw->lsctl.h = 1;
+	cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+	cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+	hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+
+	/* Ensure sampling functions are in the disabled state.  If disabled,
+	 * switch on sampling enable control. */
+	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+		err = -EAGAIN;
+		goto out;
+	}
+	cpuhw->lsctl.es = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.ed = 1;
+
+	/* Set in_use flag and store event */
+	cpuhw->event = event;
+	cpuhw->flags |= PMU_F_IN_USE;
+
+	if (flags & PERF_EF_START)
+		cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	perf_pmu_disable(event->pmu);
+	cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+	cpuhw->lsctl.es = 0;
+	cpuhw->lsctl.ed = 0;
+	cpuhw->flags &= ~PMU_F_IN_USE;
+	cpuhw->event = NULL;
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+	NULL,
+	NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+static struct pmu cpumf_sampling = {
+	.pmu_enable   = cpumsf_pmu_enable,
+	.pmu_disable  = cpumsf_pmu_disable,
+
+	.event_init   = cpumsf_pmu_event_init,
+	.add	      = cpumsf_pmu_add,
+	.del	      = cpumsf_pmu_del,
+
+	.start	      = cpumsf_pmu_start,
+	.stop	      = cpumsf_pmu_stop,
+	.read	      = cpumsf_pmu_read,
+
+	.attr_groups  = cpumsf_pmu_attr_groups,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_hw_sf *cpuhw;
+
+	if (!(alert & CPU_MF_INT_SF_MASK))
+		return;
+	inc_irq_stat(IRQEXT_CMS);
+	cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	/* Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case. */
+	if (!(cpuhw->flags & PMU_F_RESERVED))
+		return;
+
+	/* The processing below must take care of multiple alert events that
+	 * might be indicated concurrently. */
+
+	/* Program alert request */
+	if (alert & CPU_MF_INT_SF_PRA) {
+		if (cpuhw->flags & PMU_F_IN_USE)
+			hw_perf_event_update(cpuhw->event, 0);
+		else
+			WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+	}
+
+	/* Report measurement alerts only for non-PRA codes */
+	if (alert != CPU_MF_INT_SF_PRA)
+		debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+	/* Sampling authorization change request */
+	if (alert & CPU_MF_INT_SF_SACA)
+		qsi(&cpuhw->qsi);
+
+	/* Loss of sample data due to high-priority machine activities */
+	if (alert & CPU_MF_INT_SF_LSDA) {
+		pr_err("Sample data was lost\n");
+		cpuhw->flags |= PMU_F_ERR_LSDA;
+		sf_disable();
+	}
+
+	/* Invalid sampling buffer entry */
+	if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+		pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+		       alert);
+		cpuhw->flags |= PMU_F_ERR_IBE;
+		sf_disable();
+	}
+}
+
+static int cpumf_pmu_notifier(struct notifier_block *self,
+			      unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long) hcpu;
+	int flags;
+
+	/* Ignore the notification if no events are scheduled on the PMU.
+	 * This might be racy...
+	 */
+	if (!atomic_read(&num_events))
+		return NOTIFY_OK;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		flags = PMC_INIT;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	case CPU_DOWN_PREPARE:
+		flags = PMC_RELEASE;
+		smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+	int rc;
+	unsigned long min, max;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	if (!val || !strlen(val))
+		return -EINVAL;
+
+	/* Valid parameter values: "min,max" or "max" */
+	min = CPUM_SF_MIN_SDB;
+	max = CPUM_SF_MAX_SDB;
+	if (strchr(val, ','))
+		rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+	else
+		rc = kstrtoul(val, 10, &max);
+
+	if (min < 2 || min >= max || max > get_num_physpages())
+		rc = -EINVAL;
+	if (rc)
+		return rc;
+
+	sfb_set_limits(min, max);
+	pr_info("The sampling buffer limits have changed to: "
+		"min=%lu max=%lu (diag=x%lu)\n",
+		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+	return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_sfb_size = {
+	.set = param_set_sfb_size,
+	.get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI	  0x0001
+#define RS_INIT_FAILURE_BSDES	  0x0002
+#define RS_INIT_FAILURE_ALRT	  0x0003
+#define RS_INIT_FAILURE_PERF	  0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+	pr_err("Sampling facility support for perf is not available: "
+	       "reason=%04x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+		return -ENODEV;
+	}
+
+	if (si.bsdes != sizeof(struct hws_basic_entry)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+		return -EINVAL;
+	}
+
+	if (si.ad) {
+		sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+		cpumsf_pmu_events_attr[1] =
+			CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+	}
+
+	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+	if (!sfdbg)
+		pr_err("Registering for s390dbf failed\n");
+	debug_register_view(sfdbg, &debug_sprintf_view);
+
+	err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+				    cpumf_measurement_alert);
+	if (err) {
+		pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+		goto out;
+	}
+
+	err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+	if (err) {
+		pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+		unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+					cpumf_measurement_alert);
+		goto out;
+	}
+	perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+	return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
new file mode 100644
index 000000000..61595c1f0
--- /dev/null
+++ b/arch/s390/kernel/perf_event.c
@@ -0,0 +1,324 @@
+/*
+ * Performance event support for s390x
+ *
+ *  Copyright IBM Corp. 2012, 2013
+ *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT	"perf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sysinfo.h>
+
+const char *perf_pmu_name(void)
+{
+	if (cpum_cf_avail() || cpum_sf_avail())
+		return "CPU-Measurement Facilities (CPU-MF)";
+	return "pmu";
+}
+EXPORT_SYMBOL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+	int num = 0;
+
+	if (cpum_cf_avail())
+		num += PERF_CPUM_CF_MAX_CTR;
+	if (cpum_sf_avail())
+		num += PERF_CPUM_SF_MAX_CTR;
+
+	return num;
+}
+EXPORT_SYMBOL(perf_num_counters);
+
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+	struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+	if (!stack)
+		return NULL;
+
+	return (struct kvm_s390_sie_block *) stack->empty1[0];
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return false;
+#if IS_ENABLED(CONFIG_KVM)
+	return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+	return false;
+#endif
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+	return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+	return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	return is_in_guest(regs) ? instruction_pointer_guest(regs)
+				 : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+	return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+	struct perf_sf_sde_regs *sde_regs;
+	unsigned long flags;
+
+	sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+	if (sde_regs->in_guest)
+		flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+	else
+		flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+					: PERF_RECORD_MISC_KERNEL;
+	return flags;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	/* Check if the cpum_sf PMU has created the pt_regs structure.
+	 * In this case, perf misc flags can be easily extracted.  Otherwise,
+	 * do regular checks on the pt_regs content.
+	 */
+	if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+		if (!regs->gprs[15])
+			return perf_misc_flags_sf(regs);
+
+	if (is_in_guest(regs))
+		return perf_misc_guest_flags(regs);
+
+	return user_mode(regs) ? PERF_RECORD_MISC_USER
+			       : PERF_RECORD_MISC_KERNEL;
+}
+
+static void print_debug_cf(void)
+{
+	struct cpumf_ctr_info cf_info;
+	int cpu = smp_processor_id();
+
+	memset(&cf_info, 0, sizeof(cf_info));
+	if (!qctri(&cf_info))
+		pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
+			cpu, cf_info.cfvn, cf_info.csvn,
+			cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+	struct hws_qsi_info_block si;
+	int cpu = smp_processor_id();
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+		si.cpu_speed);
+
+	if (si.as)
+		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+			" bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+	if (si.ad)
+		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+			" dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (cpum_cf_avail())
+		print_debug_cf();
+	if (cpum_sf_avail())
+		print_debug_sf();
+	local_irq_restore(flags);
+}
+
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+	struct cpumf_ctr_info ci;
+
+	memset(&ci, 0, sizeof(ci));
+	if (qctri(&ci))
+		return;
+
+	seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+		   "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+	struct hws_qsi_info_block si;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	if (!si.as && !si.ad)
+		return;
+
+	seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+		   " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+		   si.cpu_speed);
+	if (si.as)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+			   " sample_size=%u\n", si.bsdes);
+	if (si.ad)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+			   " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+				     struct service_level *sl)
+{
+	if (cpum_cf_avail())
+		sl_print_counter(m);
+	if (cpum_sf_avail())
+		sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+	.seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+	return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
+/* See also arch/s390/kernel/traps.c */
+static unsigned long __store_trace(struct perf_callchain_entry *entry,
+				   unsigned long sp,
+				   unsigned long low, unsigned long high)
+{
+	struct stack_frame *sf;
+	struct pt_regs *regs;
+
+	while (1) {
+		sp = sp & PSW_ADDR_INSN;
+		if (sp < low || sp > high - sizeof(*sf))
+			return sp;
+		sf = (struct stack_frame *) sp;
+		perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+		/* Follow the backchain. */
+		while (1) {
+			low = sp;
+			sp = sf->back_chain & PSW_ADDR_INSN;
+			if (!sp)
+				break;
+			if (sp <= low || sp > high - sizeof(*sf))
+				return sp;
+			sf = (struct stack_frame *) sp;
+			perf_callchain_store(entry,
+					     sf->gprs[8] & PSW_ADDR_INSN);
+		}
+		/* Zero backchain detected, check for interrupt frame. */
+		sp = (unsigned long) (sf + 1);
+		if (sp <= low || sp > high - sizeof(*regs))
+			return sp;
+		regs = (struct pt_regs *) sp;
+		perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+		low = sp;
+		sp = regs->gprs[15];
+	}
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+			   struct pt_regs *regs)
+{
+	unsigned long head;
+	struct stack_frame *head_sf;
+
+	if (user_mode(regs))
+		return;
+
+	head = regs->gprs[15];
+	head_sf = (struct stack_frame *) head;
+
+	if (!head_sf || !head_sf->back_chain)
+		return;
+
+	head = head_sf->back_chain;
+	head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE,
+			     S390_lowcore.async_stack);
+
+	__store_trace(entry, head, S390_lowcore.thread_info,
+		      S390_lowcore.thread_info + THREAD_SIZE);
+}
+
+/* Perf defintions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%04llx,name=%s\n",
+		       pmu_attr->id, attr->attr.name);
+}
+
+/* Reserve/release functions for sharing perf hardware */
+static DEFINE_SPINLOCK(perf_hw_owner_lock);
+static void *perf_sampling_owner;
+
+int perf_reserve_sampling(void)
+{
+	int err;
+
+	err = 0;
+	spin_lock(&perf_hw_owner_lock);
+	if (perf_sampling_owner) {
+		pr_warn("The sampling facility is already reserved by %p\n",
+			perf_sampling_owner);
+		err = -EBUSY;
+	} else
+		perf_sampling_owner = __builtin_return_address(0);
+	spin_unlock(&perf_hw_owner_lock);
+	return err;
+}
+EXPORT_SYMBOL(perf_reserve_sampling);
+
+void perf_release_sampling(void)
+{
+	spin_lock(&perf_hw_owner_lock);
+	WARN_ON(!perf_sampling_owner);
+	perf_sampling_owner = NULL;
+	spin_unlock(&perf_hw_owner_lock);
+}
+EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
new file mode 100644
index 000000000..036aa01d0
--- /dev/null
+++ b/arch/s390/kernel/pgm_check.S
@@ -0,0 +1,146 @@
+/*
+ *    Program check table.
+ *
+ *    Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+#define PGM_CHECK(handler)	.long handler
+#define PGM_CHECK_DEFAULT	PGM_CHECK(default_trap_handler)
+
+/*
+ * The program check table contains exactly 128 (0x00-0x7f) entries. Each
+ * line defines the function to be called corresponding to the program check
+ * interruption code.
+ */
+.section .rodata, "a"
+ENTRY(pgm_check_table)
+PGM_CHECK_DEFAULT			/* 00 */
+PGM_CHECK(illegal_op)			/* 01 */
+PGM_CHECK(privileged_op)		/* 02 */
+PGM_CHECK(execute_exception)		/* 03 */
+PGM_CHECK(do_protection_exception)	/* 04 */
+PGM_CHECK(addressing_exception)		/* 05 */
+PGM_CHECK(specification_exception)	/* 06 */
+PGM_CHECK(data_exception)		/* 07 */
+PGM_CHECK(overflow_exception)		/* 08 */
+PGM_CHECK(divide_exception)		/* 09 */
+PGM_CHECK(overflow_exception)		/* 0a */
+PGM_CHECK(divide_exception)		/* 0b */
+PGM_CHECK(hfp_overflow_exception)	/* 0c */
+PGM_CHECK(hfp_underflow_exception)	/* 0d */
+PGM_CHECK(hfp_significance_exception)	/* 0e */
+PGM_CHECK(hfp_divide_exception)		/* 0f */
+PGM_CHECK(do_dat_exception)		/* 10 */
+PGM_CHECK(do_dat_exception)		/* 11 */
+PGM_CHECK(translation_exception)	/* 12 */
+PGM_CHECK(special_op_exception)		/* 13 */
+PGM_CHECK_DEFAULT			/* 14 */
+PGM_CHECK(operand_exception)		/* 15 */
+PGM_CHECK_DEFAULT			/* 16 */
+PGM_CHECK_DEFAULT			/* 17 */
+PGM_CHECK(transaction_exception)	/* 18 */
+PGM_CHECK_DEFAULT			/* 19 */
+PGM_CHECK_DEFAULT			/* 1a */
+PGM_CHECK(vector_exception)		/* 1b */
+PGM_CHECK(space_switch_exception)	/* 1c */
+PGM_CHECK(hfp_sqrt_exception)		/* 1d */
+PGM_CHECK_DEFAULT			/* 1e */
+PGM_CHECK_DEFAULT			/* 1f */
+PGM_CHECK_DEFAULT			/* 20 */
+PGM_CHECK_DEFAULT			/* 21 */
+PGM_CHECK_DEFAULT			/* 22 */
+PGM_CHECK_DEFAULT			/* 23 */
+PGM_CHECK_DEFAULT			/* 24 */
+PGM_CHECK_DEFAULT			/* 25 */
+PGM_CHECK_DEFAULT			/* 26 */
+PGM_CHECK_DEFAULT			/* 27 */
+PGM_CHECK_DEFAULT			/* 28 */
+PGM_CHECK_DEFAULT			/* 29 */
+PGM_CHECK_DEFAULT			/* 2a */
+PGM_CHECK_DEFAULT			/* 2b */
+PGM_CHECK_DEFAULT			/* 2c */
+PGM_CHECK_DEFAULT			/* 2d */
+PGM_CHECK_DEFAULT			/* 2e */
+PGM_CHECK_DEFAULT			/* 2f */
+PGM_CHECK_DEFAULT			/* 30 */
+PGM_CHECK_DEFAULT			/* 31 */
+PGM_CHECK_DEFAULT			/* 32 */
+PGM_CHECK_DEFAULT			/* 33 */
+PGM_CHECK_DEFAULT			/* 34 */
+PGM_CHECK_DEFAULT			/* 35 */
+PGM_CHECK_DEFAULT			/* 36 */
+PGM_CHECK_DEFAULT			/* 37 */
+PGM_CHECK(do_dat_exception)		/* 38 */
+PGM_CHECK(do_dat_exception)		/* 39 */
+PGM_CHECK(do_dat_exception)		/* 3a */
+PGM_CHECK(do_dat_exception)		/* 3b */
+PGM_CHECK_DEFAULT			/* 3c */
+PGM_CHECK_DEFAULT			/* 3d */
+PGM_CHECK_DEFAULT			/* 3e */
+PGM_CHECK_DEFAULT			/* 3f */
+PGM_CHECK_DEFAULT			/* 40 */
+PGM_CHECK_DEFAULT			/* 41 */
+PGM_CHECK_DEFAULT			/* 42 */
+PGM_CHECK_DEFAULT			/* 43 */
+PGM_CHECK_DEFAULT			/* 44 */
+PGM_CHECK_DEFAULT			/* 45 */
+PGM_CHECK_DEFAULT			/* 46 */
+PGM_CHECK_DEFAULT			/* 47 */
+PGM_CHECK_DEFAULT			/* 48 */
+PGM_CHECK_DEFAULT			/* 49 */
+PGM_CHECK_DEFAULT			/* 4a */
+PGM_CHECK_DEFAULT			/* 4b */
+PGM_CHECK_DEFAULT			/* 4c */
+PGM_CHECK_DEFAULT			/* 4d */
+PGM_CHECK_DEFAULT			/* 4e */
+PGM_CHECK_DEFAULT			/* 4f */
+PGM_CHECK_DEFAULT			/* 50 */
+PGM_CHECK_DEFAULT			/* 51 */
+PGM_CHECK_DEFAULT			/* 52 */
+PGM_CHECK_DEFAULT			/* 53 */
+PGM_CHECK_DEFAULT			/* 54 */
+PGM_CHECK_DEFAULT			/* 55 */
+PGM_CHECK_DEFAULT			/* 56 */
+PGM_CHECK_DEFAULT			/* 57 */
+PGM_CHECK_DEFAULT			/* 58 */
+PGM_CHECK_DEFAULT			/* 59 */
+PGM_CHECK_DEFAULT			/* 5a */
+PGM_CHECK_DEFAULT			/* 5b */
+PGM_CHECK_DEFAULT			/* 5c */
+PGM_CHECK_DEFAULT			/* 5d */
+PGM_CHECK_DEFAULT			/* 5e */
+PGM_CHECK_DEFAULT			/* 5f */
+PGM_CHECK_DEFAULT			/* 60 */
+PGM_CHECK_DEFAULT			/* 61 */
+PGM_CHECK_DEFAULT			/* 62 */
+PGM_CHECK_DEFAULT			/* 63 */
+PGM_CHECK_DEFAULT			/* 64 */
+PGM_CHECK_DEFAULT			/* 65 */
+PGM_CHECK_DEFAULT			/* 66 */
+PGM_CHECK_DEFAULT			/* 67 */
+PGM_CHECK_DEFAULT			/* 68 */
+PGM_CHECK_DEFAULT			/* 69 */
+PGM_CHECK_DEFAULT			/* 6a */
+PGM_CHECK_DEFAULT			/* 6b */
+PGM_CHECK_DEFAULT			/* 6c */
+PGM_CHECK_DEFAULT			/* 6d */
+PGM_CHECK_DEFAULT			/* 6e */
+PGM_CHECK_DEFAULT			/* 6f */
+PGM_CHECK_DEFAULT			/* 70 */
+PGM_CHECK_DEFAULT			/* 71 */
+PGM_CHECK_DEFAULT			/* 72 */
+PGM_CHECK_DEFAULT			/* 73 */
+PGM_CHECK_DEFAULT			/* 74 */
+PGM_CHECK_DEFAULT			/* 75 */
+PGM_CHECK_DEFAULT			/* 76 */
+PGM_CHECK_DEFAULT			/* 77 */
+PGM_CHECK_DEFAULT			/* 78 */
+PGM_CHECK_DEFAULT			/* 79 */
+PGM_CHECK_DEFAULT			/* 7a */
+PGM_CHECK_DEFAULT			/* 7b */
+PGM_CHECK_DEFAULT			/* 7c */
+PGM_CHECK_DEFAULT			/* 7d */
+PGM_CHECK_DEFAULT			/* 7e */
+PGM_CHECK_DEFAULT			/* 7f */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
new file mode 100644
index 000000000..dc5edc29b
--- /dev/null
+++ b/arch/s390/kernel/process.c
@@ -0,0 +1,226 @@
+/*
+ * This file handles the architecture dependent parts of process handling.
+ *
+ *    Copyright IBM Corp. 1999, 2009
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Hartmut Penner <hp@de.ibm.com>,
+ *		 Denis Joseph Barrow,
+ */
+
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/tick.h>
+#include <linux/personality.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/vtimer.h>
+#include <asm/exec.h>
+#include <asm/irq.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
+#include "entry.h"
+
+asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
+
+/*
+ * Return saved PC of a blocked thread. used in kernel/sched.
+ * resume in entry.S does not create a new stack frame, it
+ * just stores the registers %r6-%r15 to the frame given by
+ * schedule. We want to return the address of the caller of
+ * schedule, so we have to walk the backchain one time to
+ * find the frame schedule() store its return address.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+	struct stack_frame *sf, *low, *high;
+
+	if (!tsk || !task_stack_page(tsk))
+		return 0;
+	low = task_stack_page(tsk);
+	high = (struct stack_frame *) task_pt_regs(tsk);
+	sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+	if (sf <= low || sf > high)
+		return 0;
+	sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+	if (sf <= low || sf > high)
+		return 0;
+	return sf->gprs[8];
+}
+
+extern void kernel_thread_starter(void);
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+	exit_thread_runtime_instr();
+}
+
+void flush_thread(void)
+{
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+}
+
+void arch_release_task_struct(struct task_struct *tsk)
+{
+	if (tsk->thread.vxrs)
+		kfree(tsk->thread.vxrs);
+}
+
+int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
+		unsigned long arg, struct task_struct *p)
+{
+	struct thread_info *ti;
+	struct fake_frame
+	{
+		struct stack_frame sf;
+		struct pt_regs childregs;
+	} *frame;
+
+	frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
+	p->thread.ksp = (unsigned long) frame;
+	/* Save access registers to new thread structure. */
+	save_access_regs(&p->thread.acrs[0]);
+	/* start new process with ar4 pointing to the correct address space */
+	p->thread.mm_segment = get_fs();
+	/* Don't copy debug registers */
+	memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+	memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+	/* Initialize per thread user and system timer values */
+	ti = task_thread_info(p);
+	ti->user_timer = 0;
+	ti->system_timer = 0;
+
+	frame->sf.back_chain = 0;
+	/* new return point is ret_from_fork */
+	frame->sf.gprs[8] = (unsigned long) ret_from_fork;
+	/* fake return stack for resume(), don't go back to schedule */
+	frame->sf.gprs[9] = (unsigned long) frame;
+
+	/* Store access registers to kernel stack of new process. */
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* kernel thread */
+		memset(&frame->childregs, 0, sizeof(struct pt_regs));
+		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
+				PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+		frame->childregs.psw.addr = PSW_ADDR_AMODE |
+				(unsigned long) kernel_thread_starter;
+		frame->childregs.gprs[9] = new_stackp; /* function */
+		frame->childregs.gprs[10] = arg;
+		frame->childregs.gprs[11] = (unsigned long) do_exit;
+		frame->childregs.orig_gpr2 = -1;
+
+		return 0;
+	}
+	frame->childregs = *current_pt_regs();
+	frame->childregs.gprs[2] = 0;	/* child returns 0 on fork. */
+	frame->childregs.flags = 0;
+	if (new_stackp)
+		frame->childregs.gprs[15] = new_stackp;
+
+	/* Don't copy runtime instrumentation info */
+	p->thread.ri_cb = NULL;
+	p->thread.ri_signum = 0;
+	frame->childregs.psw.mask &= ~PSW_MASK_RI;
+
+	/* Save the fpu registers to new thread structure. */
+	save_fp_ctl(&p->thread.fp_regs.fpc);
+	save_fp_regs(p->thread.fp_regs.fprs);
+	p->thread.fp_regs.pad = 0;
+	p->thread.vxrs = NULL;
+	/* Set a new TLS ?  */
+	if (clone_flags & CLONE_SETTLS) {
+		unsigned long tls = frame->childregs.gprs[6];
+		if (is_compat_task()) {
+			p->thread.acrs[0] = (unsigned int)tls;
+		} else {
+			p->thread.acrs[0] = (unsigned int)(tls >> 32);
+			p->thread.acrs[1] = (unsigned int)tls;
+		}
+	}
+	return 0;
+}
+
+asmlinkage void execve_tail(void)
+{
+	current->thread.fp_regs.fpc = 0;
+	asm volatile("sfpc %0,%0" : : "d" (0));
+}
+
+/*
+ * fill in the FPU structure for a core dump.
+ */
+int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
+{
+	save_fp_ctl(&fpregs->fpc);
+	save_fp_regs(fpregs->fprs);
+	return 1;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	struct stack_frame *sf, *low, *high;
+	unsigned long return_address;
+	int count;
+
+	if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
+		return 0;
+	low = task_stack_page(p);
+	high = (struct stack_frame *) task_pt_regs(p);
+	sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+	if (sf <= low || sf > high)
+		return 0;
+	for (count = 0; count < 16; count++) {
+		sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+		if (sf <= low || sf > high)
+			return 0;
+		return_address = sf->gprs[8] & PSW_ADDR_INSN;
+		if (!in_sched_functions(return_address))
+			return return_address;
+	}
+	return 0;
+}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_int() & ~PAGE_MASK;
+	return sp & ~0xf;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+	/* 8MB for 32bit, 1GB for 64bit */
+	if (is_32bit_task())
+		return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
+	else
+		return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long ret;
+
+	ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	return (ret > mm->brk) ? ret : mm->brk;
+}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
new file mode 100644
index 000000000..dc488e13b
--- /dev/null
+++ b/arch/s390/kernel/processor.c
@@ -0,0 +1,103 @@
+/*
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/delay.h>
+#include <linux/cpu.h>
+#include <asm/elf.h>
+#include <asm/lowcore.h>
+#include <asm/param.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(struct cpuid, cpu_id);
+
+void notrace cpu_relax(void)
+{
+	if (!smp_cpu_mtid && MACHINE_HAS_DIAG44)
+		asm volatile("diag 0,0,0x44");
+	barrier();
+}
+EXPORT_SYMBOL(cpu_relax);
+
+/*
+ * cpu_init - initializes state that is per-CPU.
+ */
+void cpu_init(void)
+{
+	struct cpuid *id = this_cpu_ptr(&cpu_id);
+
+	get_cpu_id(id);
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	BUG_ON(current->mm);
+	enter_lazy_tlb(&init_mm, current);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	static const char *hwcap_str[] = {
+		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
+		"edat", "etf3eh", "highgprs", "te", "vx"
+	};
+	unsigned long n = (unsigned long) v - 1;
+	int i;
+
+	if (!n) {
+		s390_adjust_jiffies();
+		seq_printf(m, "vendor_id       : IBM/S390\n"
+			   "# processors    : %i\n"
+			   "bogomips per cpu: %lu.%02lu\n",
+			   num_online_cpus(), loops_per_jiffy/(500000/HZ),
+			   (loops_per_jiffy/(5000/HZ))%100);
+		seq_puts(m, "features\t: ");
+		for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+				seq_printf(m, "%s ", hwcap_str[i]);
+		seq_puts(m, "\n");
+		show_cacheinfo(m);
+	}
+	get_online_cpus();
+	if (cpu_online(n)) {
+		struct cpuid *id = &per_cpu(cpu_id, n);
+		seq_printf(m, "processor %li: "
+			   "version = %02X,  "
+			   "identification = %06X,  "
+			   "machine = %04X\n",
+			   n, id->version, id->ident, id->machine);
+	}
+	put_online_cpus();
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
+
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
new file mode 100644
index 000000000..d363c9c32
--- /dev/null
+++ b/arch/s390/kernel/ptrace.c
@@ -0,0 +1,1533 @@
+/*
+ *  Ptrace user space interface.
+ *
+ *    Copyright IBM Corp. 1999, 2010
+ *    Author(s): Denis Joseph Barrow
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/seccomp.h>
+#include <linux/compat.h>
+#include <trace/syscall.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+#ifdef CONFIG_COMPAT
+#include "compat_ptrace.h"
+#endif
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+void update_cr_regs(struct task_struct *task)
+{
+	struct pt_regs *regs = task_pt_regs(task);
+	struct thread_struct *thread = &task->thread;
+	struct per_regs old, new;
+
+	/* Take care of the enable/disable of transactional execution. */
+	if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
+		unsigned long cr, cr_new;
+
+		__ctl_store(cr, 0, 0);
+		cr_new = cr;
+		if (MACHINE_HAS_TE) {
+			/* Set or clear transaction execution TXC bit 8. */
+			cr_new |= (1UL << 55);
+			if (task->thread.per_flags & PER_FLAG_NO_TE)
+				cr_new &= ~(1UL << 55);
+		}
+		if (MACHINE_HAS_VX) {
+			/* Enable/disable of vector extension */
+			cr_new &= ~(1UL << 17);
+			if (task->thread.vxrs)
+				cr_new |= (1UL << 17);
+		}
+		if (cr_new != cr)
+			__ctl_load(cr_new, 0, 0);
+		if (MACHINE_HAS_TE) {
+			/* Set/clear transaction execution TDC bits 62/63. */
+			__ctl_store(cr, 2, 2);
+			cr_new = cr & ~3UL;
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+				if (task->thread.per_flags &
+				    PER_FLAG_TE_ABORT_RAND_TEND)
+					cr_new |= 1UL;
+				else
+					cr_new |= 2UL;
+			}
+			if (cr_new != cr)
+				__ctl_load(cr_new, 2, 2);
+		}
+	}
+	/* Copy user specified PER registers */
+	new.control = thread->per_user.control;
+	new.start = thread->per_user.start;
+	new.end = thread->per_user.end;
+
+	/* merge TIF_SINGLE_STEP into user specified PER registers. */
+	if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
+	    test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
+		if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
+			new.control |= PER_EVENT_BRANCH;
+		else
+			new.control |= PER_EVENT_IFETCH;
+		new.control |= PER_CONTROL_SUSPENSION;
+		new.control |= PER_EVENT_TRANSACTION_END;
+		if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
+			new.control |= PER_EVENT_IFETCH;
+		new.start = 0;
+		new.end = PSW_ADDR_INSN;
+	}
+
+	/* Take care of the PER enablement bit in the PSW. */
+	if (!(new.control & PER_EVENT_MASK)) {
+		regs->psw.mask &= ~PSW_MASK_PER;
+		return;
+	}
+	regs->psw.mask |= PSW_MASK_PER;
+	__ctl_store(old, 9, 11);
+	if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
+		__ctl_load(new, 9, 11);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+	clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+	clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	set_tsk_thread_flag(task, TIF_BLOCK_STEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Clear all debugging related fields.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+	memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
+	memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
+	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP);
+	task->thread.per_flags = 0;
+}
+
+#define __ADDR_MASK 7
+
+static inline unsigned long __peek_user_per(struct task_struct *child,
+					    addr_t addr)
+{
+	struct per_struct_kernel *dummy = NULL;
+
+	if (addr == (addr_t) &dummy->cr9)
+		/* Control bits of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PER_EVENT_IFETCH : child->thread.per_user.control;
+	else if (addr == (addr_t) &dummy->cr10)
+		/* Start address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			0 : child->thread.per_user.start;
+	else if (addr == (addr_t) &dummy->cr11)
+		/* End address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PSW_ADDR_INSN : child->thread.per_user.end;
+	else if (addr == (addr_t) &dummy->bits)
+		/* Single-step bit. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			(1UL << (BITS_PER_LONG - 1)) : 0;
+	else if (addr == (addr_t) &dummy->starting_addr)
+		/* Start address of the user specified per set. */
+		return child->thread.per_user.start;
+	else if (addr == (addr_t) &dummy->ending_addr)
+		/* End address of the user specified per set. */
+		return child->thread.per_user.end;
+	else if (addr == (addr_t) &dummy->perc_atmid)
+		/* PER code, ATMID and AI of the last PER trap */
+		return (unsigned long)
+			child->thread.per_event.cause << (BITS_PER_LONG - 16);
+	else if (addr == (addr_t) &dummy->address)
+		/* Address of the last PER trap */
+		return child->thread.per_event.address;
+	else if (addr == (addr_t) &dummy->access_id)
+		/* Access id of the last PER trap */
+		return (unsigned long)
+			child->thread.per_event.paid << (BITS_PER_LONG - 8);
+	return 0;
+}
+
+/*
+ * Read the word at offset addr from the user area of a process. The
+ * trouble here is that the information is littered over different
+ * locations. The process registers are found on the kernel stack,
+ * the floating point stuff and the trace settings are stored in
+ * the task structure. In addition the different structures in
+ * struct user contain pad bytes that should be read as zeroes.
+ * Lovely...
+ */
+static unsigned long __peek_user(struct task_struct *child, addr_t addr)
+{
+	struct user *dummy = NULL;
+	addr_t offset, tmp;
+
+	if (addr < (addr_t) &dummy->regs.acrs) {
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
+		if (addr == (addr_t) &dummy->regs.psw.mask) {
+			/* Return a clean psw mask. */
+			tmp &= PSW_MASK_USER | PSW_MASK_RI;
+			tmp |= PSW_USER_BITS;
+		}
+
+	} else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - (addr_t) &dummy->regs.acrs;
+		/*
+		 * Very special case: old & broken 64 bit gdb reading
+		 * from acrs[15]. Result is a 64 bit value. Read the
+		 * 32 bit acrs[15] value and shift it by 32. Sick...
+		 */
+		if (addr == (addr_t) &dummy->regs.acrs[15])
+			tmp = ((unsigned long) child->thread.acrs[15]) << 32;
+		else
+			tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
+
+	} else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
+
+	} else if (addr < (addr_t) &dummy->regs.fp_regs) {
+		/*
+		 * prevent reads of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		tmp = 0;
+
+	} else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		tmp = child->thread.fp_regs.fpc;
+		tmp <<= BITS_PER_LONG - 32;
+
+	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+		/*
+		 * floating point regs. are either in child->thread.fp_regs
+		 * or the child->thread.vxrs array
+		 */
+		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+		if (child->thread.vxrs)
+			tmp = *(addr_t *)
+			       ((addr_t) child->thread.vxrs + 2*offset);
+		else
+			tmp = *(addr_t *)
+			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+
+	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= (addr_t) &dummy->regs.per_info;
+		tmp = __peek_user_per(child, addr);
+
+	} else
+		tmp = 0;
+
+	return tmp;
+}
+
+static int
+peek_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	addr_t tmp, mask;
+
+	/*
+	 * Stupid gdb peeks/pokes the access registers in 64 bit with
+	 * an alignment of 4. Programmers from hell...
+	 */
+	mask = __ADDR_MASK;
+	if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
+	    addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+		mask = 3;
+	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+		return -EIO;
+
+	tmp = __peek_user(child, addr);
+	return put_user(tmp, (addr_t __user *) data);
+}
+
+static inline void __poke_user_per(struct task_struct *child,
+				   addr_t addr, addr_t data)
+{
+	struct per_struct_kernel *dummy = NULL;
+
+	/*
+	 * There are only three fields in the per_info struct that the
+	 * debugger user can write to.
+	 * 1) cr9: the debugger wants to set a new PER event mask
+	 * 2) starting_addr: the debugger wants to set a new starting
+	 *    address to use with the PER event mask.
+	 * 3) ending_addr: the debugger wants to set a new ending
+	 *    address to use with the PER event mask.
+	 * The user specified PER event mask and the start and end
+	 * addresses are used only if single stepping is not in effect.
+	 * Writes to any other field in per_info are ignored.
+	 */
+	if (addr == (addr_t) &dummy->cr9)
+		/* PER event mask of the user specified per set. */
+		child->thread.per_user.control =
+			data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+	else if (addr == (addr_t) &dummy->starting_addr)
+		/* Starting address of the user specified per set. */
+		child->thread.per_user.start = data;
+	else if (addr == (addr_t) &dummy->ending_addr)
+		/* Ending address of the user specified per set. */
+		child->thread.per_user.end = data;
+}
+
+/*
+ * Write a word to the user area of a process at location addr. This
+ * operation does have an additional problem compared to peek_user.
+ * Stores to the program status word and on the floating point
+ * control register needs to get checked for validity.
+ */
+static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	struct user *dummy = NULL;
+	addr_t offset;
+
+	if (addr < (addr_t) &dummy->regs.acrs) {
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		if (addr == (addr_t) &dummy->regs.psw.mask) {
+			unsigned long mask = PSW_MASK_USER;
+
+			mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
+			if ((data ^ PSW_USER_BITS) & ~mask)
+				/* Invalid psw mask. */
+				return -EINVAL;
+			if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+				/* Invalid address-space-control bits */
+				return -EINVAL;
+			if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+				/* Invalid addressing mode bits */
+				return -EINVAL;
+		}
+		*(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
+
+	} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - (addr_t) &dummy->regs.acrs;
+		/*
+		 * Very special case: old & broken 64 bit gdb writing
+		 * to acrs[15] with a 64 bit value. Ignore the lower
+		 * half of the value and write the upper 32 bit to
+		 * acrs[15]. Sick...
+		 */
+		if (addr == (addr_t) &dummy->regs.acrs[15])
+			child->thread.acrs[15] = (unsigned int) (data >> 32);
+		else
+			*(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
+
+	} else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		task_pt_regs(child)->orig_gpr2 = data;
+
+	} else if (addr < (addr_t) &dummy->regs.fp_regs) {
+		/*
+		 * prevent writes of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		return 0;
+
+	} else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		if ((unsigned int) data != 0 ||
+		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+			return -EINVAL;
+		child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+
+	} else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+		/*
+		 * floating point regs. are either in child->thread.fp_regs
+		 * or the child->thread.vxrs array
+		 */
+		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+		if (child->thread.vxrs)
+			*(addr_t *)((addr_t)
+				child->thread.vxrs + 2*offset) = data;
+		else
+			*(addr_t *)((addr_t)
+				&child->thread.fp_regs.fprs + offset) = data;
+
+	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= (addr_t) &dummy->regs.per_info;
+		__poke_user_per(child, addr, data);
+
+	}
+
+	return 0;
+}
+
+static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	addr_t mask;
+
+	/*
+	 * Stupid gdb peeks/pokes the access registers in 64 bit with
+	 * an alignment of 4. Programmers from hell indeed...
+	 */
+	mask = __ADDR_MASK;
+	if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
+	    addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+		mask = 3;
+	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+		return -EIO;
+
+	return __poke_user(child, addr, data);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	ptrace_area parea; 
+	int copied, ret;
+
+	switch (request) {
+	case PTRACE_PEEKUSR:
+		/* read the word at location addr in the USER area. */
+		return peek_user(child, addr, data);
+
+	case PTRACE_POKEUSR:
+		/* write the word at location addr in the USER area */
+		return poke_user(child, addr, data);
+
+	case PTRACE_PEEKUSR_AREA:
+	case PTRACE_POKEUSR_AREA:
+		if (copy_from_user(&parea, (void __force __user *) addr,
+							sizeof(parea)))
+			return -EFAULT;
+		addr = parea.kernel_addr;
+		data = parea.process_addr;
+		copied = 0;
+		while (copied < parea.len) {
+			if (request == PTRACE_PEEKUSR_AREA)
+				ret = peek_user(child, addr, data);
+			else {
+				addr_t utmp;
+				if (get_user(utmp,
+					     (addr_t __force __user *) data))
+					return -EFAULT;
+				ret = poke_user(child, addr, utmp);
+			}
+			if (ret)
+				return ret;
+			addr += sizeof(unsigned long);
+			data += sizeof(unsigned long);
+			copied += sizeof(unsigned long);
+		}
+		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		put_user(task_thread_info(child)->last_break,
+			 (unsigned long __user *) data);
+		return 0;
+	case PTRACE_ENABLE_TE:
+		if (!MACHINE_HAS_TE)
+			return -EIO;
+		child->thread.per_flags &= ~PER_FLAG_NO_TE;
+		return 0;
+	case PTRACE_DISABLE_TE:
+		if (!MACHINE_HAS_TE)
+			return -EIO;
+		child->thread.per_flags |= PER_FLAG_NO_TE;
+		child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+		return 0;
+	case PTRACE_TE_ABORT_RAND:
+		if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+			return -EIO;
+		switch (data) {
+		case 0UL:
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+			break;
+		case 1UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		case 2UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		default:
+			return -EINVAL;
+		}
+		return 0;
+	default:
+		/* Removing high order bit from addr (only for 31 bit). */
+		addr &= PSW_ADDR_INSN;
+		return ptrace_request(child, request, addr, data);
+	}
+}
+
+#ifdef CONFIG_COMPAT
+/*
+ * Now the fun part starts... a 31 bit program running in the
+ * 31 bit emulation tracing another program. PTRACE_PEEKTEXT,
+ * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy
+ * to handle, the difference to the 64 bit versions of the requests
+ * is that the access is done in multiples of 4 byte instead of
+ * 8 bytes (sizeof(unsigned long) on 31/64 bit).
+ * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA,
+ * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program
+ * is a 31 bit program too, the content of struct user can be
+ * emulated. A 31 bit program peeking into the struct user of
+ * a 64 bit program is a no-no.
+ */
+
+/*
+ * Same as peek_user_per but for a 31 bit program.
+ */
+static inline __u32 __peek_user_per_compat(struct task_struct *child,
+					   addr_t addr)
+{
+	struct compat_per_struct_kernel *dummy32 = NULL;
+
+	if (addr == (addr_t) &dummy32->cr9)
+		/* Control bits of the active per set. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			PER_EVENT_IFETCH : child->thread.per_user.control;
+	else if (addr == (addr_t) &dummy32->cr10)
+		/* Start address of the active per set. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			0 : child->thread.per_user.start;
+	else if (addr == (addr_t) &dummy32->cr11)
+		/* End address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PSW32_ADDR_INSN : child->thread.per_user.end;
+	else if (addr == (addr_t) &dummy32->bits)
+		/* Single-step bit. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			0x80000000 : 0;
+	else if (addr == (addr_t) &dummy32->starting_addr)
+		/* Start address of the user specified per set. */
+		return (__u32) child->thread.per_user.start;
+	else if (addr == (addr_t) &dummy32->ending_addr)
+		/* End address of the user specified per set. */
+		return (__u32) child->thread.per_user.end;
+	else if (addr == (addr_t) &dummy32->perc_atmid)
+		/* PER code, ATMID and AI of the last PER trap */
+		return (__u32) child->thread.per_event.cause << 16;
+	else if (addr == (addr_t) &dummy32->address)
+		/* Address of the last PER trap */
+		return (__u32) child->thread.per_event.address;
+	else if (addr == (addr_t) &dummy32->access_id)
+		/* Access id of the last PER trap */
+		return (__u32) child->thread.per_event.paid << 24;
+	return 0;
+}
+
+/*
+ * Same as peek_user but for a 31 bit program.
+ */
+static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
+{
+	struct compat_user *dummy32 = NULL;
+	addr_t offset;
+	__u32 tmp;
+
+	if (addr < (addr_t) &dummy32->regs.acrs) {
+		struct pt_regs *regs = task_pt_regs(child);
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		if (addr == (addr_t) &dummy32->regs.psw.mask) {
+			/* Fake a 31 bit psw mask. */
+			tmp = (__u32)(regs->psw.mask >> 32);
+			tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
+			tmp |= PSW32_USER_BITS;
+		} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
+			/* Fake a 31 bit psw address. */
+			tmp = (__u32) regs->psw.addr |
+				(__u32)(regs->psw.mask & PSW_MASK_BA);
+		} else {
+			/* gpr 0-15 */
+			tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
+		}
+	} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - (addr_t) &dummy32->regs.acrs;
+		tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
+
+	} else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
+
+	} else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+		/*
+		 * prevent reads of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		tmp = 0;
+
+	} else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		tmp = child->thread.fp_regs.fpc;
+
+	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+		/*
+		 * floating point regs. are either in child->thread.fp_regs
+		 * or the child->thread.vxrs array
+		 */
+		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+		if (child->thread.vxrs)
+			tmp = *(__u32 *)
+			       ((addr_t) child->thread.vxrs + 2*offset);
+		else
+			tmp = *(__u32 *)
+			       ((addr_t) &child->thread.fp_regs.fprs + offset);
+
+	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= (addr_t) &dummy32->regs.per_info;
+		tmp = __peek_user_per_compat(child, addr);
+
+	} else
+		tmp = 0;
+
+	return tmp;
+}
+
+static int peek_user_compat(struct task_struct *child,
+			    addr_t addr, addr_t data)
+{
+	__u32 tmp;
+
+	if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
+		return -EIO;
+
+	tmp = __peek_user_compat(child, addr);
+	return put_user(tmp, (__u32 __user *) data);
+}
+
+/*
+ * Same as poke_user_per but for a 31 bit program.
+ */
+static inline void __poke_user_per_compat(struct task_struct *child,
+					  addr_t addr, __u32 data)
+{
+	struct compat_per_struct_kernel *dummy32 = NULL;
+
+	if (addr == (addr_t) &dummy32->cr9)
+		/* PER event mask of the user specified per set. */
+		child->thread.per_user.control =
+			data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+	else if (addr == (addr_t) &dummy32->starting_addr)
+		/* Starting address of the user specified per set. */
+		child->thread.per_user.start = data;
+	else if (addr == (addr_t) &dummy32->ending_addr)
+		/* Ending address of the user specified per set. */
+		child->thread.per_user.end = data;
+}
+
+/*
+ * Same as poke_user but for a 31 bit program.
+ */
+static int __poke_user_compat(struct task_struct *child,
+			      addr_t addr, addr_t data)
+{
+	struct compat_user *dummy32 = NULL;
+	__u32 tmp = (__u32) data;
+	addr_t offset;
+
+	if (addr < (addr_t) &dummy32->regs.acrs) {
+		struct pt_regs *regs = task_pt_regs(child);
+		/*
+		 * psw, gprs, acrs and orig_gpr2 are stored on the stack
+		 */
+		if (addr == (addr_t) &dummy32->regs.psw.mask) {
+			__u32 mask = PSW32_MASK_USER;
+
+			mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
+			/* Build a 64 bit psw mask from 31 bit mask. */
+			if ((tmp ^ PSW32_USER_BITS) & ~mask)
+				/* Invalid psw mask. */
+				return -EINVAL;
+			if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+				/* Invalid address-space-control bits */
+				return -EINVAL;
+			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+				(regs->psw.mask & PSW_MASK_BA) |
+				(__u64)(tmp & mask) << 32;
+		} else if (addr == (addr_t) &dummy32->regs.psw.addr) {
+			/* Build a 64 bit psw address from 31 bit address. */
+			regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
+			/* Transfer 31 bit amode bit to psw mask. */
+			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
+				(__u64)(tmp & PSW32_ADDR_AMODE);
+		} else {
+			/* gpr 0-15 */
+			*(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
+		}
+	} else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - (addr_t) &dummy32->regs.acrs;
+		*(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
+
+	} else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		*(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
+
+	} else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+		/*
+		 * prevent writess of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		return 0;
+
+	} else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		if (test_fp_ctl(tmp))
+			return -EINVAL;
+		child->thread.fp_regs.fpc = data;
+
+	} else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+		/*
+		 * floating point regs. are either in child->thread.fp_regs
+		 * or the child->thread.vxrs array
+		 */
+		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+		if (child->thread.vxrs)
+			*(__u32 *)((addr_t)
+				child->thread.vxrs + 2*offset) = tmp;
+		else
+			*(__u32 *)((addr_t)
+				&child->thread.fp_regs.fprs + offset) = tmp;
+
+	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= (addr_t) &dummy32->regs.per_info;
+		__poke_user_per_compat(child, addr, data);
+	}
+
+	return 0;
+}
+
+static int poke_user_compat(struct task_struct *child,
+			    addr_t addr, addr_t data)
+{
+	if (!is_compat_task() || (addr & 3) ||
+	    addr > sizeof(struct compat_user) - 3)
+		return -EIO;
+
+	return __poke_user_compat(child, addr, data);
+}
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t caddr, compat_ulong_t cdata)
+{
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
+	compat_ptrace_area parea;
+	int copied, ret;
+
+	switch (request) {
+	case PTRACE_PEEKUSR:
+		/* read the word at location addr in the USER area. */
+		return peek_user_compat(child, addr, data);
+
+	case PTRACE_POKEUSR:
+		/* write the word at location addr in the USER area */
+		return poke_user_compat(child, addr, data);
+
+	case PTRACE_PEEKUSR_AREA:
+	case PTRACE_POKEUSR_AREA:
+		if (copy_from_user(&parea, (void __force __user *) addr,
+							sizeof(parea)))
+			return -EFAULT;
+		addr = parea.kernel_addr;
+		data = parea.process_addr;
+		copied = 0;
+		while (copied < parea.len) {
+			if (request == PTRACE_PEEKUSR_AREA)
+				ret = peek_user_compat(child, addr, data);
+			else {
+				__u32 utmp;
+				if (get_user(utmp,
+					     (__u32 __force __user *) data))
+					return -EFAULT;
+				ret = poke_user_compat(child, addr, utmp);
+			}
+			if (ret)
+				return ret;
+			addr += sizeof(unsigned int);
+			data += sizeof(unsigned int);
+			copied += sizeof(unsigned int);
+		}
+		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		put_user(task_thread_info(child)->last_break,
+			 (unsigned int __user *) data);
+		return 0;
+	}
+	return compat_ptrace_request(child, request, addr, data);
+}
+#endif
+
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
+{
+	long ret = 0;
+
+	/* Do the secure computing check first. */
+	if (secure_computing()) {
+		/* seccomp failures shouldn't expose any additional code. */
+		ret = -1;
+		goto out;
+	}
+
+	/*
+	 * The sysc_tracesys code in entry.S stored the system
+	 * call number to gprs[2].
+	 */
+	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+	    (tracehook_report_syscall_entry(regs) ||
+	     regs->gprs[2] >= NR_syscalls)) {
+		/*
+		 * Tracing decided this syscall should not happen or the
+		 * debugger stored an invalid system call number. Skip
+		 * the system call and the system call restart handling.
+		 */
+		clear_pt_regs_flag(regs, PIF_SYSCALL);
+		ret = -1;
+	}
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_enter(regs, regs->gprs[2]);
+
+	audit_syscall_entry(regs->gprs[2], regs->orig_gpr2,
+			    regs->gprs[3], regs->gprs[4],
+			    regs->gprs[5]);
+out:
+	return ret ?: regs->gprs[2];
+}
+
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
+{
+	audit_syscall_exit(regs);
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_exit(regs, regs->gprs[2]);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(regs, 0);
+}
+
+/*
+ * user_regset definitions.
+ */
+
+static int s390_regs_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 void *kbuf, void __user *ubuf)
+{
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		unsigned long *k = kbuf;
+		while (count > 0) {
+			*k++ = __peek_user(target, pos);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		unsigned long __user *u = ubuf;
+		while (count > 0) {
+			if (__put_user(__peek_user(target, pos), u++))
+				return -EFAULT;
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+	return 0;
+}
+
+static int s390_regs_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		const unsigned long *k = kbuf;
+		while (count > 0 && !rc) {
+			rc = __poke_user(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const unsigned long  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			unsigned long word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			rc = __poke_user(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	if (rc == 0 && target == current)
+		restore_access_regs(target->thread.acrs);
+
+	return rc;
+}
+
+static int s390_fpregs_get(struct task_struct *target,
+			   const struct user_regset *regset, unsigned int pos,
+			   unsigned int count, void *kbuf, void __user *ubuf)
+{
+	if (target == current) {
+		save_fp_ctl(&target->thread.fp_regs.fpc);
+		save_fp_regs(target->thread.fp_regs.fprs);
+	} else if (target->thread.vxrs) {
+		int i;
+
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			target->thread.fp_regs.fprs[i] =
+				*(freg_t *)(target->thread.vxrs + i);
+	}
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &target->thread.fp_regs, 0, -1);
+}
+
+static int s390_fpregs_set(struct task_struct *target,
+			   const struct user_regset *regset, unsigned int pos,
+			   unsigned int count, const void *kbuf,
+			   const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current) {
+		save_fp_ctl(&target->thread.fp_regs.fpc);
+		save_fp_regs(target->thread.fp_regs.fprs);
+	}
+
+	/* If setting FPC, must validate it first. */
+	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
+		u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
+					0, offsetof(s390_fp_regs, fprs));
+		if (rc)
+			return rc;
+		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+			return -EINVAL;
+		target->thread.fp_regs.fpc = ufpc[0];
+	}
+
+	if (rc == 0 && count > 0)
+		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					target->thread.fp_regs.fprs,
+					offsetof(s390_fp_regs, fprs), -1);
+
+	if (rc == 0) {
+		if (target == current) {
+			restore_fp_ctl(&target->thread.fp_regs.fpc);
+			restore_fp_regs(target->thread.fp_regs.fprs);
+		} else if (target->thread.vxrs) {
+			int i;
+
+			for (i = 0; i < __NUM_VXRS_LOW; i++)
+				*(freg_t *)(target->thread.vxrs + i) =
+					target->thread.fp_regs.fprs[i];
+		}
+	}
+
+	return rc;
+}
+
+static int s390_last_break_get(struct task_struct *target,
+			       const struct user_regset *regset,
+			       unsigned int pos, unsigned int count,
+			       void *kbuf, void __user *ubuf)
+{
+	if (count > 0) {
+		if (kbuf) {
+			unsigned long *k = kbuf;
+			*k = task_thread_info(target)->last_break;
+		} else {
+			unsigned long  __user *u = ubuf;
+			if (__put_user(task_thread_info(target)->last_break, u))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+static int s390_last_break_set(struct task_struct *target,
+			       const struct user_regset *regset,
+			       unsigned int pos, unsigned int count,
+			       const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static int s390_tdb_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	unsigned char *data;
+
+	if (!(regs->int_code & 0x200))
+		return -ENODATA;
+	data = target->thread.trap_tdb;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+}
+
+static int s390_tdb_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static int s390_vxrs_low_get(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     void *kbuf, void __user *ubuf)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target->thread.vxrs) {
+		if (target == current)
+			save_vx_regs(target->thread.vxrs);
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+	} else
+		memset(vxrs, 0, sizeof(vxrs));
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_low_set(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     const void *kbuf, const void __user *ubuf)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i, rc;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (!target->thread.vxrs) {
+		rc = alloc_vector_registers(target);
+		if (rc)
+			return rc;
+	} else if (target == current)
+		save_vx_regs(target->thread.vxrs);
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+	if (rc == 0) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			*((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
+		if (target == current)
+			restore_vx_regs(target->thread.vxrs);
+	}
+
+	return rc;
+}
+
+static int s390_vxrs_high_get(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      void *kbuf, void __user *ubuf)
+{
+	__vector128 vxrs[__NUM_VXRS_HIGH];
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target->thread.vxrs) {
+		if (target == current)
+			save_vx_regs(target->thread.vxrs);
+		memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+		       sizeof(vxrs));
+	} else
+		memset(vxrs, 0, sizeof(vxrs));
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_high_set(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      const void *kbuf, const void __user *ubuf)
+{
+	int rc;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (!target->thread.vxrs) {
+		rc = alloc_vector_registers(target);
+		if (rc)
+			return rc;
+	} else if (target == current)
+		save_vx_regs(target->thread.vxrs);
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
+	if (rc == 0 && target == current)
+		restore_vx_regs(target->thread.vxrs);
+
+	return rc;
+}
+
+static int s390_system_call_get(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				void *kbuf, void __user *ubuf)
+{
+	unsigned int *data = &task_thread_info(target)->system_call;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   data, 0, sizeof(unsigned int));
+}
+
+static int s390_system_call_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	unsigned int *data = &task_thread_info(target)->system_call;
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  data, 0, sizeof(unsigned int));
+}
+
+static const struct user_regset s390_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(s390_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_regs_get,
+		.set = s390_regs_set,
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(s390_fp_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_fpregs_get,
+		.set = s390_fpregs_set,
+	},
+	{
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(unsigned int),
+		.align = sizeof(unsigned int),
+		.get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
+	{
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_last_break_get,
+		.set = s390_last_break_set,
+	},
+	{
+		.core_note_type = NT_S390_TDB,
+		.n = 1,
+		.size = 256,
+		.align = 1,
+		.get = s390_tdb_get,
+		.set = s390_tdb_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_LOW,
+		.n = __NUM_VXRS_LOW,
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.get = s390_vxrs_low_get,
+		.set = s390_vxrs_low_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_HIGH,
+		.n = __NUM_VXRS_HIGH,
+		.size = sizeof(__vector128),
+		.align = sizeof(__vector128),
+		.get = s390_vxrs_high_get,
+		.set = s390_vxrs_high_set,
+	},
+};
+
+static const struct user_regset_view user_s390_view = {
+	.name = UTS_MACHINE,
+	.e_machine = EM_S390,
+	.regsets = s390_regsets,
+	.n = ARRAY_SIZE(s390_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int s390_compat_regs_get(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				void *kbuf, void __user *ubuf)
+{
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			*k++ = __peek_user_compat(target, pos);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		compat_ulong_t __user *u = ubuf;
+		while (count > 0) {
+			if (__put_user(__peek_user_compat(target, pos), u++))
+				return -EFAULT;
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+	return 0;
+}
+
+static int s390_compat_regs_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0 && !rc) {
+			rc = __poke_user_compat(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			compat_ulong_t word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			rc = __poke_user_compat(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	if (rc == 0 && target == current)
+		restore_access_regs(target->thread.acrs);
+
+	return rc;
+}
+
+static int s390_compat_regs_high_get(struct task_struct *target,
+				     const struct user_regset *regset,
+				     unsigned int pos, unsigned int count,
+				     void *kbuf, void __user *ubuf)
+{
+	compat_ulong_t *gprs_high;
+
+	gprs_high = (compat_ulong_t *)
+		&task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+	if (kbuf) {
+		compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			*k++ = *gprs_high;
+			gprs_high += 2;
+			count -= sizeof(*k);
+		}
+	} else {
+		compat_ulong_t __user *u = ubuf;
+		while (count > 0) {
+			if (__put_user(*gprs_high, u++))
+				return -EFAULT;
+			gprs_high += 2;
+			count -= sizeof(*u);
+		}
+	}
+	return 0;
+}
+
+static int s390_compat_regs_high_set(struct task_struct *target,
+				     const struct user_regset *regset,
+				     unsigned int pos, unsigned int count,
+				     const void *kbuf, const void __user *ubuf)
+{
+	compat_ulong_t *gprs_high;
+	int rc = 0;
+
+	gprs_high = (compat_ulong_t *)
+		&task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			*gprs_high = *k++;
+			*gprs_high += 2;
+			count -= sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			unsigned long word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			*gprs_high = word;
+			*gprs_high += 2;
+			count -= sizeof(*u);
+		}
+	}
+
+	return rc;
+}
+
+static int s390_compat_last_break_get(struct task_struct *target,
+				      const struct user_regset *regset,
+				      unsigned int pos, unsigned int count,
+				      void *kbuf, void __user *ubuf)
+{
+	compat_ulong_t last_break;
+
+	if (count > 0) {
+		last_break = task_thread_info(target)->last_break;
+		if (kbuf) {
+			unsigned long *k = kbuf;
+			*k = last_break;
+		} else {
+			unsigned long  __user *u = ubuf;
+			if (__put_user(last_break, u))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+static int s390_compat_last_break_set(struct task_struct *target,
+				      const struct user_regset *regset,
+				      unsigned int pos, unsigned int count,
+				      const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static const struct user_regset s390_compat_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.get = s390_compat_regs_get,
+		.set = s390_compat_regs_set,
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.get = s390_fpregs_get,
+		.set = s390_fpregs_set,
+	},
+	{
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(compat_uint_t),
+		.align = sizeof(compat_uint_t),
+		.get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
+	{
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_compat_last_break_get,
+		.set = s390_compat_last_break_set,
+	},
+	{
+		.core_note_type = NT_S390_TDB,
+		.n = 1,
+		.size = 256,
+		.align = 1,
+		.get = s390_tdb_get,
+		.set = s390_tdb_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_LOW,
+		.n = __NUM_VXRS_LOW,
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.get = s390_vxrs_low_get,
+		.set = s390_vxrs_low_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_HIGH,
+		.n = __NUM_VXRS_HIGH,
+		.size = sizeof(__vector128),
+		.align = sizeof(__vector128),
+		.get = s390_vxrs_high_get,
+		.set = s390_vxrs_high_set,
+	},
+	{
+		.core_note_type = NT_S390_HIGH_GPRS,
+		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.get = s390_compat_regs_high_get,
+		.set = s390_compat_regs_high_set,
+	},
+};
+
+static const struct user_regset_view user_s390_compat_view = {
+	.name = "s390",
+	.e_machine = EM_S390,
+	.regsets = s390_compat_regsets,
+	.n = ARRAY_SIZE(s390_compat_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		return &user_s390_compat_view;
+#endif
+	return &user_s390_view;
+}
+
+static const char *gpr_names[NUM_GPRS] = {
+	"r0", "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+	if (offset >= NUM_GPRS)
+		return 0;
+	return regs->gprs[offset];
+}
+
+int regs_query_register_offset(const char *name)
+{
+	unsigned long offset;
+
+	if (!name || *name != 'r')
+		return -EINVAL;
+	if (kstrtoul(name + 1, 10, &offset))
+		return -EINVAL;
+	if (offset >= NUM_GPRS)
+		return -EINVAL;
+	return offset;
+}
+
+const char *regs_query_register_name(unsigned int offset)
+{
+	if (offset >= NUM_GPRS)
+		return NULL;
+	return gpr_names[offset];
+}
+
+static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+	unsigned long ksp = kernel_stack_pointer(regs);
+
+	return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+	unsigned long addr;
+
+	addr = kernel_stack_pointer(regs) + n * sizeof(long);
+	if (!regs_within_kernel_stack(regs, addr))
+		return 0;
+	return *(unsigned long *)addr;
+}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
new file mode 100644
index 000000000..52aab0bd8
--- /dev/null
+++ b/arch/s390/kernel/reipl.S
@@ -0,0 +1,155 @@
+/*
+ *    Copyright IBM Corp 2000, 2011
+ *    Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ *		 Denis Joseph Barrow,
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigp.h>
+
+#
+# store_status
+#
+# Prerequisites to run this function:
+# - Prefix register is set to zero
+# - Original prefix register is stored in "dump_prefix_page"
+# - Lowcore protection is off
+#
+ENTRY(store_status)
+	/* Save register one and load save area base */
+	stg	%r1,__LC_SAVE_AREA_RESTART
+	lghi	%r1,SAVE_AREA_BASE
+	/* General purpose registers */
+	stmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	lg	%r2,__LC_SAVE_AREA_RESTART
+	stg	%r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
+	/* Control registers */
+	stctg	%c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* Access registers */
+	stam	%a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* Floating point registers */
+	std	%f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	std	%f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* Floating point control register */
+	stfpc	__LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* CPU timer */
+	stpt	__LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
+	/* Saved prefix register */
+	larl	%r2,dump_prefix_page
+	mvc	__LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
+	/* Clock comparator - seven bytes */
+	larl	%r2,.Lclkcmp
+	stckc	0(%r2)
+	mvc	__LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
+	/* Program status word */
+	epsw	%r2,%r3
+	st	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
+	st	%r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
+	larl	%r2,store_status
+	stg	%r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
+	br	%r14
+
+	.section .bss
+	.align	8
+.Lclkcmp:	.quad	0x0000000000000000
+	.previous
+
+#
+# do_reipl_asm
+# Parameter: r2 = schid of reipl device
+#
+
+ENTRY(do_reipl_asm)
+		basr	%r13,0
+.Lpg0:		lpswe	.Lnewpsw-.Lpg0(%r13)
+.Lpg1:		brasl	%r14,store_status
+
+		lctlg	%c6,%c6,.Lall-.Lpg0(%r13)
+		lgr	%r1,%r2
+		mvc	__LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
+		stsch	.Lschib-.Lpg0(%r13)
+		oi	.Lschib+5-.Lpg0(%r13),0x84
+.Lecs:		xi	.Lschib+27-.Lpg0(%r13),0x01
+		msch	.Lschib-.Lpg0(%r13)
+		lghi	%r0,5
+.Lssch:		ssch	.Liplorb-.Lpg0(%r13)
+		jz	.L001
+		brct	%r0,.Lssch
+		bas	%r14,.Ldisab-.Lpg0(%r13)
+.L001:		mvc	__LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13)
+.Ltpi:		lpswe	.Lwaitpsw-.Lpg0(%r13)
+.Lcont:		c	%r1,__LC_SUBCHANNEL_ID
+		jnz	.Ltpi
+		clc	__LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13)
+		jnz	.Ltpi
+		tsch	.Liplirb-.Lpg0(%r13)
+		tm	.Liplirb+9-.Lpg0(%r13),0xbf
+		jz	.L002
+		bas	%r14,.Ldisab-.Lpg0(%r13)
+.L002:		tm	.Liplirb+8-.Lpg0(%r13),0xf3
+		jz	.L003
+		bas	%r14,.Ldisab-.Lpg0(%r13)
+.L003:		st	%r1,__LC_SUBCHANNEL_ID
+		lhi	%r1,0		 # mode 0 = esa
+		slr	%r0,%r0		 # set cpuid to zero
+		sigp	%r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode
+		lpsw	0
+.Ldisab:	sll	%r14,1
+		srl	%r14,1		 # need to kill hi bit to avoid specification exceptions.
+		st	%r14,.Ldispsw+12-.Lpg0(%r13)
+		lpswe	.Ldispsw-.Lpg0(%r13)
+		.align	8
+.Lall:		.quad	0x00000000ff000000
+		.align	16
+/*
+ * These addresses have to be 31 bit otherwise
+ * the sigp will throw a specifcation exception
+ * when switching to ESA mode as bit 31 be set
+ * in the ESA psw.
+ * Bit 31 of the addresses has to be 0 for the
+ * 31bit lpswe instruction a fact they appear to have
+ * omitted from the pop.
+ */
+.Lnewpsw:	.quad	0x0000000080000000
+		.quad	.Lpg1
+.Lpcnew:	.quad	0x0000000080000000
+		.quad	.Lecs
+.Lionew:	.quad	0x0000000080000000
+		.quad	.Lcont
+.Lwaitpsw:	.quad	0x0202000080000000
+		.quad	.Ltpi
+.Ldispsw:	.quad	0x0002000080000000
+		.quad	0x0000000000000000
+.Liplccws:	.long	0x02000000,0x60000018
+		.long	0x08000008,0x20000001
+.Liplorb:	.long	0x0049504c,0x0040ff80
+		.long	0x00000000+.Liplccws
+.Lschib:	.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+.Liplirb:	.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
+		.long	0x00000000,0x00000000
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
new file mode 100644
index 000000000..cfac28330
--- /dev/null
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -0,0 +1,121 @@
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger,
+ *	      Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/sigp.h>
+
+/*
+ * moves the new kernel to its destination...
+ * %r2 = pointer to first kimage_entry_t
+ * %r3 = start address - where to jump to after the job is done...
+ *
+ * %r5 will be used as temp. storage
+ * %r6 holds the destination address
+ * %r7 = PAGE_SIZE
+ * %r8 holds the source address
+ * %r9 = PAGE_SIZE
+ *
+ * 0xf000 is a page_mask
+ */
+
+	.text
+ENTRY(relocate_kernel)
+		basr	%r13,0		# base address
+	.base:
+		stnsm	sys_msk-.base(%r13),0xfb	# disable DAT
+		stctg	%c0,%c15,ctlregs-.base(%r13)
+		stmg	%r0,%r15,gprregs-.base(%r13)
+		lghi	%r0,3
+		sllg	%r0,%r0,31
+		stg	%r0,0x1d0(%r0)
+		la	%r0,.back_pgm-.base(%r13)
+		stg	%r0,0x1d8(%r0)
+		la	%r1,load_psw-.base(%r13)
+		mvc	0(8,%r0),0(%r1)
+		la	%r0,.back-.base(%r13)
+		st	%r0,4(%r0)
+		oi	4(%r0),0x80
+		lghi	%r0,0
+		diag	%r0,%r0,0x308
+	.back:
+		lhi	%r1,1		# mode 1 = esame
+		sigp	%r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode
+		sam64			# switch to 64 bit addressing mode
+		basr	%r13,0
+	.back_base:
+		oi	have_diag308-.back_base(%r13),0x01
+		lctlg	%c0,%c15,ctlregs-.back_base(%r13)
+		lmg	%r0,%r15,gprregs-.back_base(%r13)
+		j	.top
+	.back_pgm:
+		lmg	%r0,%r15,gprregs-.base(%r13)
+	.top:
+		lghi	%r7,4096	# load PAGE_SIZE in r7
+		lghi	%r9,4096	# load PAGE_SIZE in r9
+		lg	%r5,0(%r2)	# read another word for indirection page
+		aghi	%r2,8		# increment pointer
+		tml	%r5,0x1		# is it a destination page?
+		je	.indir_check	# NO, goto "indir_check"
+		lgr	%r6,%r5		# r6 = r5
+		nill	%r6,0xf000	# mask it out and...
+		j	.top		# ...next iteration
+	.indir_check:
+		tml	%r5,0x2		# is it a indirection page?
+		je	.done_test	# NO, goto "done_test"
+		nill	%r5,0xf000	# YES, mask out,
+		lgr	%r2,%r5		# move it into the right register,
+		j	.top		# and read next...
+	.done_test:
+		tml	%r5,0x4		# is it the done indicator?
+		je	.source_test	# NO! Well, then it should be the source indicator...
+		j	.done		# ok, lets finish it here...
+	.source_test:
+		tml	%r5,0x8		# it should be a source indicator...
+		je	.top		# NO, ignore it...
+		lgr	%r8,%r5		# r8 = r5
+		nill	%r8,0xf000	# masking
+	0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+		jo	0b
+		j	.top
+	.done:
+		sgr	%r0,%r0		# clear register r0
+		la	%r4,load_psw-.base(%r13)	# load psw-address into the register
+		o	%r3,4(%r4)	# or load address into psw
+		st	%r3,4(%r4)
+		mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
+		tm	have_diag308-.base(%r13),0x01
+		jno	.no_diag308
+		diag	%r0,%r0,0x308
+	.no_diag308:
+		sam31			# 31 bit mode
+		sr	%r1,%r1		# erase register r1
+		sr	%r2,%r2		# erase register r2
+		sigp	%r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
+		lpsw	0		# hopefully start new kernel...
+
+		.align	8
+	load_psw:
+		.long	0x00080000,0x80000000
+	sys_msk:
+		.quad	0
+	ctlregs:
+		.rept	16
+		.quad	0
+		.endr
+	gprregs:
+		.rept	16
+		.quad	0
+		.endr
+	have_diag308:
+		.byte	0
+		.align	8
+	relocate_kernel_end:
+	.align 8
+	.globl	relocate_kernel_len
+	relocate_kernel_len:
+		.quad	relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
new file mode 100644
index 000000000..26b4ae96f
--- /dev/null
+++ b/arch/s390/kernel/runtime_instr.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright IBM Corp. 2012
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <asm/runtime_instr.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+/* empty control block to disable RI by loading it */
+struct runtime_instr_cb runtime_instr_empty_cb;
+
+static int runtime_instr_avail(void)
+{
+	return test_facility(64);
+}
+
+static void disable_runtime_instr(void)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	load_runtime_instr_cb(&runtime_instr_empty_cb);
+
+	/*
+	 * Make sure the RI bit is deleted from the PSW. If the user did not
+	 * switch off RI before the system call the process will get a
+	 * specification exception otherwise.
+	 */
+	regs->psw.mask &= ~PSW_MASK_RI;
+}
+
+static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+	cb->buf_limit = 0xfff;
+	cb->int_requested = 1;
+	cb->pstate = 1;
+	cb->pstate_set_buf = 1;
+	cb->pstate_sample = 1;
+	cb->pstate_collect = 1;
+	cb->key = PAGE_DEFAULT_KEY;
+	cb->valid = 1;
+}
+
+void exit_thread_runtime_instr(void)
+{
+	struct task_struct *task = current;
+
+	if (!task->thread.ri_cb)
+		return;
+	disable_runtime_instr();
+	kfree(task->thread.ri_cb);
+	task->thread.ri_signum = 0;
+	task->thread.ri_cb = NULL;
+}
+
+static void runtime_instr_int_handler(struct ext_code ext_code,
+				unsigned int param32, unsigned long param64)
+{
+	struct siginfo info;
+
+	if (!(param32 & CPU_MF_INT_RI_MASK))
+		return;
+
+	inc_irq_stat(IRQEXT_CMR);
+
+	if (!current->thread.ri_cb)
+		return;
+	if (current->thread.ri_signum < SIGRTMIN ||
+	    current->thread.ri_signum > SIGRTMAX) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	memset(&info, 0, sizeof(info));
+	info.si_signo = current->thread.ri_signum;
+	info.si_code = SI_QUEUE;
+	if (param32 & CPU_MF_INT_RI_BUF_FULL)
+		info.si_int = ENOBUFS;
+	else if (param32 & CPU_MF_INT_RI_HALTED)
+		info.si_int = ECANCELED;
+	else
+		return; /* unknown reason */
+
+	send_sig_info(current->thread.ri_signum, &info, current);
+}
+
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+{
+	struct runtime_instr_cb *cb;
+
+	if (!runtime_instr_avail())
+		return -EOPNOTSUPP;
+
+	if (command == S390_RUNTIME_INSTR_STOP) {
+		preempt_disable();
+		exit_thread_runtime_instr();
+		preempt_enable();
+		return 0;
+	}
+
+	if (command != S390_RUNTIME_INSTR_START ||
+	    (signum < SIGRTMIN || signum > SIGRTMAX))
+		return -EINVAL;
+
+	if (!current->thread.ri_cb) {
+		cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+		if (!cb)
+			return -ENOMEM;
+	} else {
+		cb = current->thread.ri_cb;
+		memset(cb, 0, sizeof(*cb));
+	}
+
+	init_runtime_instr_cb(cb);
+	current->thread.ri_signum = signum;
+
+	/* now load the control block to make it available */
+	preempt_disable();
+	current->thread.ri_cb = cb;
+	load_runtime_instr_cb(cb);
+	preempt_enable();
+	return 0;
+}
+
+static int __init runtime_instr_init(void)
+{
+	int rc;
+
+	if (!runtime_instr_avail())
+		return 0;
+
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+				   runtime_instr_int_handler);
+	if (rc)
+		irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	else
+		pr_info("Runtime instrumentation facility initialized\n");
+	return rc;
+}
+device_initcall(runtime_instr_init);
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
new file mode 100644
index 000000000..9f6046793
--- /dev/null
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -0,0 +1,13 @@
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+EXPORT_SYMBOL(sie64a);
+EXPORT_SYMBOL(sie_exit);
+#endif
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
new file mode 100644
index 000000000..43c3169ea
--- /dev/null
+++ b/arch/s390/kernel/sclp.S
@@ -0,0 +1,351 @@
+/*
+ * Mini SCLP driver.
+ *
+ * Copyright IBM Corp. 2004, 2009
+ *
+ *   Author(s):	Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
+ *		Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/irq.h>
+
+LC_EXT_NEW_PSW		= 0x58			# addr of ext int handler
+LC_EXT_NEW_PSW_64	= 0x1b0			# addr of ext int handler 64 bit
+LC_EXT_INT_PARAM	= 0x80			# addr of ext int parameter
+LC_EXT_INT_CODE		= 0x86			# addr of ext int code
+LC_AR_MODE_ID		= 0xa3
+
+#
+# Subroutine which waits synchronously until either an external interruption
+# or a timeout occurs.
+#
+# Parameters:
+#   R2	= 0 for no timeout, non-zero for timeout in (approximated) seconds
+#
+# Returns:
+#   R2	= 0 on interrupt, 2 on timeout
+#   R3	= external interruption parameter if R2=0
+#
+
+_sclp_wait_int:
+	stm	%r6,%r15,24(%r15)		# save registers
+	basr	%r13,0				# get base register
+.LbaseS1:
+	ahi	%r15,-96			# create stack frame
+	la	%r8,LC_EXT_NEW_PSW		# register int handler
+	la	%r9,.LextpswS1-.LbaseS1(%r13)
+	tm	LC_AR_MODE_ID,1
+	jno	.Lesa1
+	la	%r8,LC_EXT_NEW_PSW_64		# register int handler 64 bit
+	la	%r9,.LextpswS1_64-.LbaseS1(%r13)
+.Lesa1:
+	mvc	.LoldpswS1-.LbaseS1(16,%r13),0(%r8)
+	mvc	0(16,%r8),0(%r9)
+	epsw	%r6,%r7				# set current addressing mode
+	nill	%r6,0x1				# in new psw (31 or 64 bit mode)
+	nilh	%r7,0x8000
+	stm	%r6,%r7,0(%r8)
+	lhi	%r6,0x0200			# cr mask for ext int (cr0.54)
+	ltr	%r2,%r2
+	jz	.LsetctS1
+	ahi	%r6,0x0800			# cr mask for clock int (cr0.52)
+	stck	.LtimeS1-.LbaseS1(%r13)		# initiate timeout
+	al	%r2,.LtimeS1-.LbaseS1(%r13)
+	st	%r2,.LtimeS1-.LbaseS1(%r13)
+	sckc	.LtimeS1-.LbaseS1(%r13)
+
+.LsetctS1:
+	stctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# enable required interrupts
+	l	%r0,.LctlS1-.LbaseS1(%r13)
+	lhi	%r1,~(0x200 | 0x800)		# clear old values
+	nr	%r1,%r0
+	or	%r1,%r6				# set new value
+	st	%r1,.LctlS1-.LbaseS1(%r13)
+	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)
+	st	%r0,.LctlS1-.LbaseS1(%r13)
+	lhi	%r2,2				# return code for timeout
+.LloopS1:
+	lpsw	.LwaitpswS1-.LbaseS1(%r13)	# wait until interrupt
+.LwaitS1:
+	lh	%r7,LC_EXT_INT_CODE
+	chi	%r7,EXT_IRQ_CLK_COMP		# timeout?
+	je	.LtimeoutS1
+	chi	%r7,EXT_IRQ_SERVICE_SIG		# service int?
+	jne	.LloopS1
+	sr	%r2,%r2
+	l	%r3,LC_EXT_INT_PARAM
+.LtimeoutS1:
+	lctl	%c0,%c0,.LctlS1-.LbaseS1(%r13)	# restore interrupt setting
+	# restore old handler
+	mvc	0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14				# return to caller
+
+	.align	8
+.LoldpswS1:
+	.long	0, 0, 0, 0			# old ext int PSW
+.LextpswS1:
+	.long	0x00080000, 0x80000000+.LwaitS1	# PSW to handle ext int
+.LextpswS1_64:
+	.quad	0, .LwaitS1			# PSW to handle ext int, 64 bit
+.LwaitpswS1:
+	.long	0x010a0000, 0x00000000+.LloopS1	# PSW to wait for ext int
+.LtimeS1:
+	.quad	0				# current time
+.LctlS1:
+	.long	0				# CT0 contents
+
+#
+# Subroutine to synchronously issue a service call.
+#
+# Parameters:
+#   R2	= command word
+#   R3	= sccb address
+#
+# Returns:
+#   R2	= 0 on success, 1 on failure
+#   R3	= sccb response code if R2 = 0
+#
+
+_sclp_servc:
+	stm	%r6,%r15,24(%r15)		# save registers
+	ahi	%r15,-96			# create stack frame
+	lr	%r6,%r2				# save command word
+	lr	%r7,%r3				# save sccb address
+.LretryS2:
+	lhi	%r2,1				# error return code
+	.insn	rre,0xb2200000,%r6,%r7		# servc
+	brc	1,.LendS2			# exit if not operational
+	brc	8,.LnotbusyS2			# go on if not busy
+	sr	%r2,%r2				# wait until no longer busy
+	bras	%r14,_sclp_wait_int
+	j	.LretryS2			# retry
+.LnotbusyS2:
+	sr	%r2,%r2				# wait until result
+	bras	%r14,_sclp_wait_int
+	sr	%r2,%r2
+	lh	%r3,6(%r7)
+.LendS2:
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14
+
+#
+# Subroutine to set up the SCLP interface.
+#
+# Parameters:
+#   R2	= 0 to activate, non-zero to deactivate
+#
+# Returns:
+#   R2	= 0 on success, non-zero on failure
+#
+
+_sclp_setup:
+	stm	%r6,%r15,24(%r15)		# save registers
+	ahi	%r15,-96			# create stack frame
+	basr	%r13,0				# get base register
+.LbaseS3:
+	l	%r6,.LsccbS0-.LbaseS3(%r13)	# prepare init mask sccb
+	mvc	0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
+	ltr	%r2,%r2				# initialization?
+	jz	.LdoinitS3			# go ahead
+	# clear masks
+	xc	.LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
+.LdoinitS3:
+	l	%r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
+	lr	%r3,%r6				# get sccb address
+	bras	%r14,_sclp_servc		# issue service call
+	ltr	%r2,%r2				# servc successful?
+	jnz	.LerrorS3
+	chi	%r3,0x20			# write mask successful?
+	jne	.LerrorS3
+	# check masks
+	la	%r2,.LinitmaskS3-.LinitsccbS3(%r6)
+	l	%r1,0(%r2)			# receive mask ok?
+	n	%r1,12(%r2)
+	cl	%r1,0(%r2)
+	jne	.LerrorS3
+	l	%r1,4(%r2)			# send mask ok?
+	n	%r1,8(%r2)
+	cl	%r1,4(%r2)
+	sr	%r2,%r2
+	je	.LendS3
+.LerrorS3:
+	lhi	%r2,1				# error return code
+.LendS3:
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14
+.LwritemaskS3:
+	.long	0x00780005			# SCLP command for write mask
+.LinitsccbS3:
+	.word	.LinitendS3-.LinitsccbS3
+	.byte	0,0,0,0
+	.word	0
+	.word	0
+	.word	4
+.LinitmaskS3:
+	.long	0x80000000
+	.long	0x40000000
+	.long	0
+	.long	0
+.LinitendS3:
+
+#
+# Subroutine which prints a given text to the SCLP console.
+#
+# Parameters:
+#   R2	= address of nil-terminated ASCII text
+#
+# Returns:
+#   R2	= 0 on success, 1 on failure
+#
+
+_sclp_print:
+	stm	%r6,%r15,24(%r15)		# save registers
+	ahi	%r15,-96			# create stack frame
+	basr	%r13,0				# get base register
+.LbaseS4:
+	l	%r8,.LsccbS0-.LbaseS4(%r13)	# prepare write data sccb
+	mvc	0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
+	la	%r7,.LmtoS4-.LwritesccbS4(%r8)	# current mto addr
+	sr	%r0,%r0
+	l	%r10,.Lascebc-.LbaseS4(%r13)	# address of translation table
+.LinitmtoS4:
+	# initialize mto
+	mvc	0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
+	lhi	%r6,.LmtoendS4-.LmtoS4		# current mto length
+.LloopS4:
+	ic	%r0,0(%r2)			# get character
+	ahi	%r2,1
+	ltr	%r0,%r0				# end of string?
+	jz	.LfinalizemtoS4
+	chi	%r0,0x0a			# end of line (NL)?
+	jz	.LfinalizemtoS4
+	stc	%r0,0(%r6,%r7)			# copy to mto
+	la	%r11,0(%r6,%r7)
+	tr	0(1,%r11),0(%r10)		# translate to EBCDIC
+	ahi	%r6,1
+	j	.LloopS4
+.LfinalizemtoS4:
+	sth	%r6,0(%r7)			# update mto length
+	lh	%r9,.LmdbS4-.LwritesccbS4(%r8)	# update mdb length
+	ar	%r9,%r6
+	sth	%r9,.LmdbS4-.LwritesccbS4(%r8)
+	lh	%r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
+	ar	%r9,%r6
+	sth	%r9,.LevbufS4-.LwritesccbS4(%r8)
+	lh	%r9,0(%r8)			# update sccb length
+	ar	%r9,%r6
+	sth	%r9,0(%r8)
+	ar	%r7,%r6				# update current mto address
+	ltr	%r0,%r0				# more characters?
+	jnz	.LinitmtoS4
+	l	%r2,.LwritedataS4-.LbaseS4(%r13)# write data
+	lr	%r3,%r8
+	bras	%r14,_sclp_servc
+	ltr	%r2,%r2				# servc successful?
+	jnz	.LendS4
+	chi	%r3,0x20			# write data successful?
+	je	.LendS4
+	lhi	%r2,1				# error return code
+.LendS4:
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14
+
+#
+# Function which prints a given text to the SCLP console.
+#
+# Parameters:
+#   R2	= address of nil-terminated ASCII text
+#
+# Returns:
+#   R2	= 0 on success, 1 on failure
+#
+
+ENTRY(_sclp_print_early)
+	stm	%r6,%r15,24(%r15)		# save registers
+	ahi	%r15,-96			# create stack frame
+	tm	LC_AR_MODE_ID,1
+	jno	.Lesa2
+	ahi	%r15,-80
+	stmh	%r6,%r15,96(%r15)		# store upper register halves
+.Lesa2:
+	lr	%r10,%r2			# save string pointer
+	lhi	%r2,0
+	bras	%r14,_sclp_setup		# enable console
+	ltr	%r2,%r2
+	jnz	.LendS5
+	lr	%r2,%r10
+	bras	%r14,_sclp_print		# print string
+	ltr	%r2,%r2
+	jnz	.LendS5
+	lhi	%r2,1
+	bras	%r14,_sclp_setup		# disable console
+.LendS5:
+	tm	LC_AR_MODE_ID,1
+	jno	.Lesa3
+	lgfr	%r2,%r2				# sign extend return value
+	lmh	%r6,%r15,96(%r15)		# restore upper register halves
+	ahi	%r15,80
+.Lesa3:
+	lm	%r6,%r15,120(%r15)		# restore registers
+	br	%r14
+
+.LwritedataS4:
+	.long	0x00760005			# SCLP command for write data
+.LwritesccbS4:
+	# sccb
+	.word	.LmtoS4-.LwritesccbS4
+	.byte	0
+	.byte	0,0,0
+	.word	0
+
+	# evbuf
+.LevbufS4:
+	.word	.LmtoS4-.LevbufS4
+	.byte	0x02
+	.byte	0
+	.word	0
+
+.LmdbS4:
+	# mdb
+	.word	.LmtoS4-.LmdbS4
+	.word	1
+	.long	0xd4c4c240
+	.long	1
+
+	# go
+.LgoS4:
+	.word	.LmtoS4-.LgoS4
+	.word	1
+	.long	0
+	.byte	0,0,0,0,0,0,0,0
+	.byte	0,0,0
+	.byte	0
+	.byte	0,0,0,0,0,0,0
+	.byte	0
+	.word	0
+	.byte	0,0,0,0,0,0,0,0,0,0
+	.byte	0,0,0,0,0,0,0,0
+	.byte	0,0,0,0,0,0,0,0
+
+.LmtoS4:
+	.word	.LmtoendS4-.LmtoS4
+	.word	4
+	.word	0x1000
+	.byte	0
+	.byte	0,0,0
+.LmtoendS4:
+
+	# Global constants
+.LsccbS0:
+	.long	_sclp_work_area
+.Lascebc:
+	.long	_ascebc
+
+.section .data,"aw",@progbits
+	.balign 4096
+_sclp_work_area:
+	.fill	4096
+.previous
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
new file mode 100644
index 000000000..7262fe438
--- /dev/null
+++ b/arch/s390/kernel/setup.c
@@ -0,0 +1,892 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "arch/i386/kernel/setup.c"
+ *    Copyright (C) 1995, Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/bootmem.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/kernel_stat.h>
+#include <linux/device.h>
+#include <linux/notifier.h>
+#include <linux/pfn.h>
+#include <linux/ctype.h>
+#include <linux/reboot.h>
+#include <linux/topology.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/compat.h>
+
+#include <asm/ipl.h>
+#include <asm/facility.h>
+#include <asm/smp.h>
+#include <asm/mmu_context.h>
+#include <asm/cpcmd.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/ebcdic.h>
+#include <asm/kvm_virtio.h>
+#include <asm/diag.h>
+#include <asm/os_info.h>
+#include <asm/sclp.h>
+#include <asm/sysinfo.h>
+#include "entry.h"
+
+/*
+ * Machine setup..
+ */
+unsigned int console_mode = 0;
+EXPORT_SYMBOL(console_mode);
+
+unsigned int console_devno = -1;
+EXPORT_SYMBOL(console_devno);
+
+unsigned int console_irq = -1;
+EXPORT_SYMBOL(console_irq);
+
+unsigned long elf_hwcap = 0;
+char elf_platform[ELF_PLATFORM_SIZE];
+
+int __initdata memory_end_set;
+unsigned long __initdata memory_end;
+unsigned long __initdata max_physmem_end;
+
+unsigned long VMALLOC_START;
+EXPORT_SYMBOL(VMALLOC_START);
+
+unsigned long VMALLOC_END;
+EXPORT_SYMBOL(VMALLOC_END);
+
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
+
+unsigned long MODULES_VADDR;
+unsigned long MODULES_END;
+
+/* An array with a pointer to the lowcore of every CPU. */
+struct _lowcore *lowcore_ptr[NR_CPUS];
+EXPORT_SYMBOL(lowcore_ptr);
+
+/*
+ * This is set up by the setup-routine at boot-time
+ * for S390 need to find out, what we have to setup
+ * using address 0x10400 ...
+ */
+
+#include <asm/setup.h>
+
+/*
+ * condev= and conmode= setup parameter.
+ */
+
+static int __init condev_setup(char *str)
+{
+	int vdev;
+
+	vdev = simple_strtoul(str, &str, 0);
+	if (vdev >= 0 && vdev < 65536) {
+		console_devno = vdev;
+		console_irq = -1;
+	}
+	return 1;
+}
+
+__setup("condev=", condev_setup);
+
+static void __init set_preferred_console(void)
+{
+	if (MACHINE_IS_KVM) {
+		if (sclp_has_vt220())
+			add_preferred_console("ttyS", 1, NULL);
+		else if (sclp_has_linemode())
+			add_preferred_console("ttyS", 0, NULL);
+		else
+			add_preferred_console("hvc", 0, NULL);
+	} else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+		add_preferred_console("ttyS", 0, NULL);
+	else if (CONSOLE_IS_3270)
+		add_preferred_console("tty3270", 0, NULL);
+}
+
+static int __init conmode_setup(char *str)
+{
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+	if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
+                SET_CONSOLE_SCLP;
+#endif
+#if defined(CONFIG_TN3215_CONSOLE)
+	if (strncmp(str, "3215", 5) == 0)
+		SET_CONSOLE_3215;
+#endif
+#if defined(CONFIG_TN3270_CONSOLE)
+	if (strncmp(str, "3270", 5) == 0)
+		SET_CONSOLE_3270;
+#endif
+	set_preferred_console();
+        return 1;
+}
+
+__setup("conmode=", conmode_setup);
+
+static void __init conmode_default(void)
+{
+	char query_buffer[1024];
+	char *ptr;
+
+        if (MACHINE_IS_VM) {
+		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
+		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
+		ptr = strstr(query_buffer, "SUBCHANNEL =");
+		console_irq = simple_strtoul(ptr + 13, NULL, 16);
+		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
+		ptr = strstr(query_buffer, "CONMODE");
+		/*
+		 * Set the conmode to 3215 so that the device recognition 
+		 * will set the cu_type of the console to 3215. If the
+		 * conmode is 3270 and we don't set it back then both
+		 * 3215 and the 3270 driver will try to access the console
+		 * device (3215 as console and 3270 as normal tty).
+		 */
+		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
+		if (ptr == NULL) {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+			return;
+		}
+		if (strncmp(ptr + 8, "3270", 4) == 0) {
+#if defined(CONFIG_TN3270_CONSOLE)
+			SET_CONSOLE_3270;
+#elif defined(CONFIG_TN3215_CONSOLE)
+			SET_CONSOLE_3215;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+		} else if (strncmp(ptr + 8, "3215", 4) == 0) {
+#if defined(CONFIG_TN3215_CONSOLE)
+			SET_CONSOLE_3215;
+#elif defined(CONFIG_TN3270_CONSOLE)
+			SET_CONSOLE_3270;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+		}
+	} else {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+		SET_CONSOLE_SCLP;
+#endif
+	}
+}
+
+#ifdef CONFIG_CRASH_DUMP
+static void __init setup_zfcpdump(void)
+{
+	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+		return;
+	if (OLDMEM_BASE)
+		return;
+	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+	console_loglevel = 2;
+}
+#else
+static inline void setup_zfcpdump(void) {}
+#endif /* CONFIG_CRASH_DUMP */
+
+ /*
+ * Reboot, halt and power_off stubs. They just call _machine_restart,
+ * _machine_halt or _machine_power_off. 
+ */
+
+void machine_restart(char *command)
+{
+	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_restart(command);
+}
+
+void machine_halt(void)
+{
+	if (!in_interrupt() || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_halt();
+}
+
+void machine_power_off(void)
+{
+	if (!in_interrupt() || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_power_off();
+}
+
+/*
+ * Dummy power off function.
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+static int __init early_parse_mem(char *p)
+{
+	memory_end = memparse(p, &p);
+	memory_end &= PAGE_MASK;
+	memory_end_set = 1;
+	return 0;
+}
+early_param("mem", early_parse_mem);
+
+static int __init parse_vmalloc(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+	VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
+	return 0;
+}
+early_param("vmalloc", parse_vmalloc);
+
+void *restart_stack __attribute__((__section__(".data")));
+
+static void __init setup_lowcore(void)
+{
+	struct _lowcore *lc;
+
+	/*
+	 * Setup lowcore for boot cpu
+	 */
+	BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
+	lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
+	lc->restart_psw.mask = PSW_KERNEL_BITS;
+	lc->restart_psw.addr =
+		PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+	lc->external_new_psw.mask = PSW_KERNEL_BITS |
+		PSW_MASK_DAT | PSW_MASK_MCHECK;
+	lc->external_new_psw.addr =
+		PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+	lc->svc_new_psw.mask = PSW_KERNEL_BITS |
+		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+	lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+	lc->program_new_psw.mask = PSW_KERNEL_BITS |
+		PSW_MASK_DAT | PSW_MASK_MCHECK;
+	lc->program_new_psw.addr =
+		PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
+	lc->mcck_new_psw.addr =
+		PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+	lc->io_new_psw.mask = PSW_KERNEL_BITS |
+		PSW_MASK_DAT | PSW_MASK_MCHECK;
+	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+	lc->clock_comparator = -1ULL;
+	lc->kernel_stack = ((unsigned long) &init_thread_union)
+		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+	lc->async_stack = (unsigned long)
+		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0)
+		+ ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+	lc->panic_stack = (unsigned long)
+		__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0)
+		+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
+	lc->thread_info = (unsigned long) &init_thread_union;
+	lc->machine_flags = S390_lowcore.machine_flags;
+	lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
+	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+	       MAX_FACILITY_BIT/8);
+	if (MACHINE_HAS_VX)
+		lc->vector_save_area_addr =
+			(unsigned long) &lc->vector_save_area;
+	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
+	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
+	lc->async_enter_timer = S390_lowcore.async_enter_timer;
+	lc->exit_timer = S390_lowcore.exit_timer;
+	lc->user_timer = S390_lowcore.user_timer;
+	lc->system_timer = S390_lowcore.system_timer;
+	lc->steal_timer = S390_lowcore.steal_timer;
+	lc->last_update_timer = S390_lowcore.last_update_timer;
+	lc->last_update_clock = S390_lowcore.last_update_clock;
+
+	restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
+	restart_stack += ASYNC_SIZE;
+
+	/*
+	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
+	 * restart data to the absolute zero lowcore. This is necessary if
+	 * PSW restart is done on an offline CPU that has lowcore zero.
+	 */
+	lc->restart_stack = (unsigned long) restart_stack;
+	lc->restart_fn = (unsigned long) do_restart;
+	lc->restart_data = 0;
+	lc->restart_source = -1UL;
+
+	/* Setup absolute zero lowcore */
+	mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
+	mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
+	mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
+	mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
+	mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
+
+#ifdef CONFIG_SMP
+	lc->spinlock_lockval = arch_spin_lockval(0);
+#endif
+
+	set_prefix((u32)(unsigned long) lc);
+	lowcore_ptr[0] = lc;
+}
+
+static struct resource code_resource = {
+	.name  = "Kernel code",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource data_resource = {
+	.name = "Kernel data",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource bss_resource = {
+	.name = "Kernel bss",
+	.flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource __initdata *standard_resources[] = {
+	&code_resource,
+	&data_resource,
+	&bss_resource,
+};
+
+static void __init setup_resources(void)
+{
+	struct resource *res, *std_res, *sub_res;
+	struct memblock_region *reg;
+	int j;
+
+	code_resource.start = (unsigned long) &_text;
+	code_resource.end = (unsigned long) &_etext - 1;
+	data_resource.start = (unsigned long) &_etext;
+	data_resource.end = (unsigned long) &_edata - 1;
+	bss_resource.start = (unsigned long) &__bss_start;
+	bss_resource.end = (unsigned long) &__bss_stop - 1;
+
+	for_each_memblock(memory, reg) {
+		res = alloc_bootmem_low(sizeof(*res));
+		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+
+		res->name = "System RAM";
+		res->start = reg->base;
+		res->end = reg->base + reg->size - 1;
+		request_resource(&iomem_resource, res);
+
+		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
+			std_res = standard_resources[j];
+			if (std_res->start < res->start ||
+			    std_res->start > res->end)
+				continue;
+			if (std_res->end > res->end) {
+				sub_res = alloc_bootmem_low(sizeof(*sub_res));
+				*sub_res = *std_res;
+				sub_res->end = res->end;
+				std_res->start = res->end + 1;
+				request_resource(res, sub_res);
+			} else {
+				request_resource(res, std_res);
+			}
+		}
+	}
+}
+
+static void __init setup_memory_end(void)
+{
+	unsigned long vmax, vmalloc_size, tmp;
+
+	/* Choose kernel address space layout: 2, 3, or 4 levels. */
+	vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
+	tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
+	tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
+	if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42))
+		vmax = 1UL << 42;	/* 3-level kernel page table */
+	else
+		vmax = 1UL << 53;	/* 4-level kernel page table */
+	/* module area is at the end of the kernel address space. */
+	MODULES_END = vmax;
+	MODULES_VADDR = MODULES_END - MODULES_LEN;
+	VMALLOC_END = MODULES_VADDR;
+	VMALLOC_START = vmax - vmalloc_size;
+
+	/* Split remaining virtual space between 1:1 mapping & vmemmap array */
+	tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+	/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+	tmp = SECTION_ALIGN_UP(tmp);
+	tmp = VMALLOC_START - tmp * sizeof(struct page);
+	tmp &= ~((vmax >> 11) - 1);	/* align to page table level */
+	tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
+	vmemmap = (struct page *) tmp;
+
+	/* Take care that memory_end is set and <= vmemmap */
+	memory_end = min(memory_end ?: max_physmem_end, tmp);
+	max_pfn = max_low_pfn = PFN_DOWN(memory_end);
+	memblock_remove(memory_end, ULONG_MAX);
+
+	pr_notice("Max memory size: %luMB\n", memory_end >> 20);
+}
+
+static void __init setup_vmcoreinfo(void)
+{
+	mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * When kdump is enabled, we have to ensure that no memory from
+ * the area [0 - crashkernel memory size] and
+ * [crashk_res.start - crashk_res.end] is set offline.
+ */
+static int kdump_mem_notifier(struct notifier_block *nb,
+			      unsigned long action, void *data)
+{
+	struct memory_notify *arg = data;
+
+	if (action != MEM_GOING_OFFLINE)
+		return NOTIFY_OK;
+	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
+		return NOTIFY_BAD;
+	if (arg->start_pfn > PFN_DOWN(crashk_res.end))
+		return NOTIFY_OK;
+	if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
+		return NOTIFY_OK;
+	return NOTIFY_BAD;
+}
+
+static struct notifier_block kdump_mem_nb = {
+	.notifier_call = kdump_mem_notifier,
+};
+
+#endif
+
+/*
+ * Make sure that the area behind memory_end is protected
+ */
+static void reserve_memory_end(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
+	    !OLDMEM_BASE && sclp_get_hsa_size()) {
+		memory_end = sclp_get_hsa_size();
+		memory_end &= PAGE_MASK;
+		memory_end_set = 1;
+	}
+#endif
+	if (!memory_end_set)
+		return;
+	memblock_reserve(memory_end, ULONG_MAX);
+}
+
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void reserve_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (OLDMEM_BASE)
+		/* Forget all memory above the running kdump system */
+		memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
+
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void remove_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (OLDMEM_BASE)
+		/* Forget all memory above the running kdump system */
+		memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
+
+/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+static void __init reserve_crashkernel(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	unsigned long long crash_base, crash_size;
+	phys_addr_t low, high;
+	int rc;
+
+	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
+			       &crash_base);
+
+	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
+	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
+	if (rc || crash_size == 0)
+		return;
+
+	if (memblock.memory.regions[0].size < crash_size) {
+		pr_info("crashkernel reservation failed: %s\n",
+			"first memory chunk must be at least crashkernel size");
+		return;
+	}
+
+	low = crash_base ?: OLDMEM_BASE;
+	high = low + crash_size;
+	if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
+		/* The crashkernel fits into OLDMEM, reuse OLDMEM */
+		crash_base = low;
+	} else {
+		/* Find suitable area in free memory */
+		low = max_t(unsigned long, crash_size, sclp_get_hsa_size());
+		high = crash_base ? crash_base + crash_size : ULONG_MAX;
+
+		if (crash_base && crash_base < low) {
+			pr_info("crashkernel reservation failed: %s\n",
+				"crash_base too low");
+			return;
+		}
+		low = crash_base ?: low;
+		crash_base = memblock_find_in_range(low, high, crash_size,
+						    KEXEC_CRASH_MEM_ALIGN);
+	}
+
+	if (!crash_base) {
+		pr_info("crashkernel reservation failed: %s\n",
+			"no suitable area found");
+		return;
+	}
+
+	if (register_memory_notifier(&kdump_mem_nb))
+		return;
+
+	if (!OLDMEM_BASE && MACHINE_IS_VM)
+		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+	insert_resource(&iomem_resource, &crashk_res);
+	memblock_remove(crash_base, crash_size);
+	pr_info("Reserving %lluMB of memory at %lluMB "
+		"for crashkernel (System RAM: %luMB)\n",
+		crash_size >> 20, crash_base >> 20,
+		(unsigned long)memblock.memory.total_size >> 20);
+	os_info_crashkernel_add(crash_base, crash_size);
+#endif
+}
+
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_start = INITRD_START;
+	initrd_end = initrd_start + INITRD_SIZE;
+	memblock_reserve(INITRD_START, INITRD_SIZE);
+#endif
+}
+
+/*
+ * Check for initrd being in usable memory
+ */
+static void __init check_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (INITRD_START && INITRD_SIZE &&
+	    !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
+		pr_err("initrd does not fit memory.\n");
+		memblock_free(INITRD_START, INITRD_SIZE);
+		initrd_start = initrd_end = 0;
+	}
+#endif
+}
+
+/*
+ * Reserve all kernel text
+ */
+static void __init reserve_kernel(void)
+{
+	unsigned long start_pfn;
+	start_pfn = PFN_UP(__pa(&_end));
+
+	/*
+	 * Reserve memory used for lowcore/command line/kernel image.
+	 */
+	memblock_reserve(0, (unsigned long)_ehead);
+	memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
+			 - (unsigned long)_stext);
+}
+
+static void __init reserve_elfcorehdr(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (is_kdump_kernel())
+		memblock_reserve(elfcorehdr_addr - OLDMEM_BASE,
+				 PAGE_ALIGN(elfcorehdr_size));
+#endif
+}
+
+static void __init setup_memory(void)
+{
+	struct memblock_region *reg;
+
+	/*
+	 * Init storage key for present memory
+	 */
+	for_each_memblock(memory, reg) {
+		storage_key_init_range(reg->base, reg->base + reg->size);
+	}
+	psw_set_key(PAGE_DEFAULT_KEY);
+
+	/* Only cosmetics */
+	memblock_enforce_memory_limit(memblock_end_of_DRAM());
+}
+
+/*
+ * Setup hardware capabilities.
+ */
+static void __init setup_hwcaps(void)
+{
+	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
+	struct cpuid cpu_id;
+	int i;
+
+	/*
+	 * The store facility list bits numbers as found in the principles
+	 * of operation are numbered with bit 1UL<<31 as number 0 to
+	 * bit 1UL<<0 as number 31.
+	 *   Bit 0: instructions named N3, "backported" to esa-mode
+	 *   Bit 2: z/Architecture mode is active
+	 *   Bit 7: the store-facility-list-extended facility is installed
+	 *   Bit 17: the message-security assist is installed
+	 *   Bit 19: the long-displacement facility is installed
+	 *   Bit 21: the extended-immediate facility is installed
+	 *   Bit 22: extended-translation facility 3 is installed
+	 *   Bit 30: extended-translation facility 3 enhancement facility
+	 * These get translated to:
+	 *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
+	 *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
+	 *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
+	 *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
+	 */
+	for (i = 0; i < 6; i++)
+		if (test_facility(stfl_bits[i]))
+			elf_hwcap |= 1UL << i;
+
+	if (test_facility(22) && test_facility(30))
+		elf_hwcap |= HWCAP_S390_ETF3EH;
+
+	/*
+	 * Check for additional facilities with store-facility-list-extended.
+	 * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
+	 * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
+	 * as stored by stfl, bits 32-xxx contain additional facilities.
+	 * How many facility words are stored depends on the number of
+	 * doublewords passed to the instruction. The additional facilities
+	 * are:
+	 *   Bit 42: decimal floating point facility is installed
+	 *   Bit 44: perform floating point operation facility is installed
+	 * translated to:
+	 *   HWCAP_S390_DFP bit 6 (42 && 44).
+	 */
+	if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
+		elf_hwcap |= HWCAP_S390_DFP;
+
+	/*
+	 * Huge page support HWCAP_S390_HPAGE is bit 7.
+	 */
+	if (MACHINE_HAS_HPAGE)
+		elf_hwcap |= HWCAP_S390_HPAGE;
+
+	/*
+	 * 64-bit register support for 31-bit processes
+	 * HWCAP_S390_HIGH_GPRS is bit 9.
+	 */
+	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
+
+	/*
+	 * Transactional execution support HWCAP_S390_TE is bit 10.
+	 */
+	if (test_facility(50) && test_facility(73))
+		elf_hwcap |= HWCAP_S390_TE;
+
+	/*
+	 * Vector extension HWCAP_S390_VXRS is bit 11.
+	 */
+	if (test_facility(129))
+		elf_hwcap |= HWCAP_S390_VXRS;
+	get_cpu_id(&cpu_id);
+	add_device_randomness(&cpu_id, sizeof(cpu_id));
+	switch (cpu_id.machine) {
+	case 0x9672:
+		strcpy(elf_platform, "g5");
+		break;
+	case 0x2064:
+	case 0x2066:
+	default:	/* Use "z900" as default for 64 bit kernels. */
+		strcpy(elf_platform, "z900");
+		break;
+	case 0x2084:
+	case 0x2086:
+		strcpy(elf_platform, "z990");
+		break;
+	case 0x2094:
+	case 0x2096:
+		strcpy(elf_platform, "z9-109");
+		break;
+	case 0x2097:
+	case 0x2098:
+		strcpy(elf_platform, "z10");
+		break;
+	case 0x2817:
+	case 0x2818:
+		strcpy(elf_platform, "z196");
+		break;
+	case 0x2827:
+	case 0x2828:
+		strcpy(elf_platform, "zEC12");
+		break;
+	case 0x2964:
+		strcpy(elf_platform, "z13");
+		break;
+	}
+}
+
+/*
+ * Add system information as device randomness
+ */
+static void __init setup_randomness(void)
+{
+	struct sysinfo_3_2_2 *vmms;
+
+	vmms = (struct sysinfo_3_2_2 *) alloc_page(GFP_KERNEL);
+	if (vmms && stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+		add_device_randomness(&vmms, vmms->count);
+	free_page((unsigned long) vmms);
+}
+
+/*
+ * Setup function called from init/main.c just after the banner
+ * was printed.
+ */
+
+void __init setup_arch(char **cmdline_p)
+{
+        /*
+         * print what head.S has found out about the machine
+         */
+	if (MACHINE_IS_VM)
+		pr_info("Linux is running as a z/VM "
+			"guest operating system in 64-bit mode\n");
+	else if (MACHINE_IS_KVM)
+		pr_info("Linux is running under KVM in 64-bit mode\n");
+	else if (MACHINE_IS_LPAR)
+		pr_info("Linux is running natively in 64-bit mode\n");
+
+	/* Have one command line that is parsed and saved in /proc/cmdline */
+	/* boot_command_line has been already set up in early.c */
+	*cmdline_p = boot_command_line;
+
+        ROOT_DEV = Root_RAM0;
+
+	/* Is init_mm really needed? */
+	init_mm.start_code = PAGE_OFFSET;
+	init_mm.end_code = (unsigned long) &_etext;
+	init_mm.end_data = (unsigned long) &_edata;
+	init_mm.brk = (unsigned long) &_end;
+
+	parse_early_param();
+	os_info_init();
+	setup_ipl();
+
+	/* Do some memory reservations *before* memory is added to memblock */
+	reserve_memory_end();
+	reserve_oldmem();
+	reserve_kernel();
+	reserve_initrd();
+	reserve_elfcorehdr();
+	memblock_allow_resize();
+
+	/* Get information about *all* installed memory */
+	detect_memory_memblock();
+
+	remove_oldmem();
+
+	/*
+	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
+	 * extra checks that HOLES_IN_ZONE would require.
+	 *
+	 * Is this still required?
+	 */
+	memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+
+	setup_memory_end();
+	setup_memory();
+
+	check_initrd();
+	reserve_crashkernel();
+
+	setup_resources();
+	setup_vmcoreinfo();
+	setup_lowcore();
+	smp_fill_possible_mask();
+        cpu_init();
+
+	/*
+	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
+	 */
+	setup_hwcaps();
+
+	/*
+	 * Create kernel page tables and switch to virtual addressing.
+	 */
+        paging_init();
+
+        /* Setup default console */
+	conmode_default();
+	set_preferred_console();
+
+	/* Setup zfcpdump support */
+	setup_zfcpdump();
+
+	/* Add system specific data to the random pool */
+	setup_randomness();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
new file mode 100644
index 000000000..c551f22ce
--- /dev/null
+++ b/arch/s390/kernel/signal.c
@@ -0,0 +1,559 @@
+/*
+ *    Copyright IBM Corp. 1999, 2006
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ *    Based on Intel version
+ * 
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/tracehook.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+/*
+ * Layout of an old-style signal-frame:
+ *	-----------------------------------------
+ *	| save area (_SIGNAL_FRAMESIZE)		|
+ *	-----------------------------------------
+ *	| struct sigcontext			|
+ *	|	oldmask				|
+ *	|	_sigregs *			|
+ *	-----------------------------------------
+ *	| _sigregs with				|
+ *	|	_s390_regs_common		|
+ *	|	_s390_fp_regs			|
+ *	-----------------------------------------
+ *	| int signo				|
+ *	-----------------------------------------
+ *	| _sigregs_ext with			|
+ *	|	gprs_high 64 byte (opt)		|
+ *	|	vxrs_low 128 byte (opt)		|
+ *	|	vxrs_high 256 byte (opt)	|
+ *	|	reserved 128 byte (opt)		|
+ *	-----------------------------------------
+ *	| __u16 svc_insn			|
+ *	-----------------------------------------
+ * The svc_insn entry with the sigreturn system call opcode does not
+ * have a fixed position and moves if gprs_high or vxrs exist.
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct sigframe
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+	struct sigcontext sc;
+	_sigregs sregs;
+	int signo;
+	_sigregs_ext sregs_ext;
+	__u16 svc_insn;		/* Offset of svc_insn is NOT fixed! */
+};
+
+/*
+ * Layout of an rt signal-frame:
+ *	-----------------------------------------
+ *	| save area (_SIGNAL_FRAMESIZE)		|
+ *	-----------------------------------------
+ *	| svc __NR_rt_sigreturn 2 byte		|
+ *	-----------------------------------------
+ *	| struct siginfo			|
+ *	-----------------------------------------
+ *	| struct ucontext_extended with		|
+ *	|	unsigned long uc_flags		|
+ *	|	struct ucontext *uc_link	|
+ *	|	stack_t uc_stack		|
+ *	|	_sigregs uc_mcontext with	|
+ *	|		_s390_regs_common	|
+ *	|		_s390_fp_regs		|
+ *	|	sigset_t uc_sigmask		|
+ *	|	_sigregs_ext uc_mcontext_ext	|
+ *	|		gprs_high 64 byte (opt)	|
+ *	|		vxrs_low 128 byte (opt)	|
+ *	|		vxrs_high 256 byte (opt)|
+ *	|		reserved 128 byte (opt)	|
+ *	-----------------------------------------
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct rt_sigframe
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+	__u16 svc_insn;
+	struct siginfo info;
+	struct ucontext_extended uc;
+};
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+	save_access_regs(current->thread.acrs);
+	save_fp_ctl(&current->thread.fp_regs.fpc);
+	if (current->thread.vxrs) {
+		int i;
+
+		save_vx_regs(current->thread.vxrs);
+		for (i = 0; i < __NUM_FPRS; i++)
+			current->thread.fp_regs.fprs[i] =
+				*(freg_t *)(current->thread.vxrs + i);
+	} else
+		save_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+	restore_access_regs(current->thread.acrs);
+	/* restore_fp_ctl is done in restore_sigregs */
+	if (current->thread.vxrs) {
+		int i;
+
+		for (i = 0; i < __NUM_FPRS; i++)
+			*(freg_t *)(current->thread.vxrs + i) =
+				current->thread.fp_regs.fprs[i];
+		restore_vx_regs(current->thread.vxrs);
+	} else
+		restore_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+	_sigregs user_sregs;
+
+	/* Copy a 'clean' PSW mask to the user to avoid leaking
+	   information about whether PER is currently on.  */
+	user_sregs.regs.psw.mask = PSW_USER_BITS |
+		(regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+	user_sregs.regs.psw.addr = regs->psw.addr;
+	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
+	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+	       sizeof(user_sregs.regs.acrs));
+	memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
+	       sizeof(user_sregs.fpregs));
+	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
+		return -EFAULT;
+	return 0;
+}
+
+static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+	_sigregs user_sregs;
+
+	/* Alwys make any pending restarted system call return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
+		return -EFAULT;
+
+	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+		return -EINVAL;
+
+	/* Loading the floating-point-control word can fail. Do that first. */
+	if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+		return -EINVAL;
+
+	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+	regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+		(user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+	/* Check for invalid user address space control. */
+	if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+		regs->psw.mask = PSW_ASC_PRIMARY |
+			(regs->psw.mask & ~PSW_MASK_ASC);
+	/* Check for invalid amode */
+	if (regs->psw.mask & PSW_MASK_EA)
+		regs->psw.mask |= PSW_MASK_BA;
+	regs->psw.addr = user_sregs.regs.psw.addr;
+	memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
+	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+	       sizeof(current->thread.acrs));
+
+	memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
+	       sizeof(current->thread.fp_regs));
+
+	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+	return 0;
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs_ext(struct pt_regs *regs,
+			    _sigregs_ext __user *sregs_ext)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Save vector registers to signal stack */
+	if (current->thread.vxrs) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+				   sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_to_user(&sregs_ext->vxrs_high,
+				   current->thread.vxrs + __NUM_VXRS_LOW,
+				   sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int restore_sigregs_ext(struct pt_regs *regs,
+			       _sigregs_ext __user *sregs_ext)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Restore vector registers from signal stack */
+	if (current->thread.vxrs) {
+		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+				     sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+				     &sregs_ext->vxrs_high,
+				     sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			*((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+	}
+	return 0;
+}
+
+SYSCALL_DEFINE0(sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct sigframe __user *frame =
+		(struct sigframe __user *) regs->gprs[15];
+	sigset_t set;
+
+	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
+		goto badframe;
+	set_current_blocked(&set);
+	if (restore_sigregs(regs, &frame->sregs))
+		goto badframe;
+	if (restore_sigregs_ext(regs, &frame->sregs_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+	set_current_blocked(&set);
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV, current);
+	return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = regs->gprs[15];
+
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (! sas_ss_flags(sp))
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame(int sig, struct k_sigaction *ka,
+		       sigset_t *set, struct pt_regs * regs)
+{
+	struct sigframe __user *frame;
+	struct sigcontext sc;
+	unsigned long restorer;
+	size_t frame_size;
+
+	/*
+	 * gprs_high are only present for a 31-bit task running on
+	 * a 64-bit kernel (see compat_signal.c) but the space for
+	 * gprs_high need to be allocated if vector registers are
+	 * included in the signal frame on a 31-bit system.
+	 */
+	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
+	if (MACHINE_HAS_VX)
+		frame_size += sizeof(frame->sregs_ext);
+	frame = get_sigframe(ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+		return -EFAULT;
+
+	/* Create struct sigcontext on the signal stack */
+	memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE);
+	sc.sregs = (_sigregs __user __force *) &frame->sregs;
+	if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create _sigregs on the signal stack */
+	if (save_sigregs(regs, &frame->sregs))
+		return -EFAULT;
+
+	/* Place signal number on stack to allow backtrace from handler.  */
+	if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+		return -EFAULT;
+
+	/* Create _sigregs_ext on the signal stack */
+	if (save_sigregs_ext(regs, &frame->sregs_ext))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ka->sa.sa_flags & SA_RESTORER) {
+		restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+	} else {
+		/* Signal frame without vector registers are short ! */
+		__u16 __user *svc = (void __user *) frame + frame_size - 2;
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
+			return -EFAULT;
+		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+	}
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (unsigned long) frame;
+	/* Force default amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+
+	regs->gprs[2] = sig;
+	regs->gprs[3] = (unsigned long) &frame->sc;
+
+	/* We forgot to include these in the sigcontext.
+	   To avoid breaking binary compatibility, they are passed as args. */
+	if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+	    sig == SIGTRAP || sig == SIGFPE) {
+		/* set extra registers only for synchronous signals */
+		regs->gprs[4] = regs->int_code & 127;
+		regs->gprs[5] = regs->int_parm_long;
+		regs->gprs[6] = task_thread_info(current)->last_break;
+	}
+	return 0;
+}
+
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+			  struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long uc_flags, restorer;
+	size_t frame_size;
+
+	frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
+	/*
+	 * gprs_high are only present for a 31-bit task running on
+	 * a 64-bit kernel (see compat_signal.c) but the space for
+	 * gprs_high need to be allocated if vector registers are
+	 * included in the signal frame on a 31-bit system.
+	 */
+	uc_flags = 0;
+	if (MACHINE_HAS_VX) {
+		frame_size += sizeof(_sigregs_ext);
+		if (current->thread.vxrs)
+			uc_flags |= UC_VXRS;
+	}
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		restorer = (unsigned long)
+			ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+	} else {
+		__u16 __user *svc = &frame->svc_insn;
+		if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
+			return -EFAULT;
+		restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+	}
+
+	/* Create siginfo on the signal stack */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create ucontext on the signal stack. */
+	if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+	    __put_user(NULL, &frame->uc.uc_link) ||
+	    __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+	    save_sigregs(regs, &frame->uc.uc_mcontext) ||
+	    __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+	    save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (unsigned long) frame;
+	/* Force default amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+
+	regs->gprs[2] = ksig->sig;
+	regs->gprs[3] = (unsigned long) &frame->info;
+	regs->gprs[4] = (unsigned long) &frame->uc;
+	regs->gprs[5] = task_thread_info(current)->last_break;
+	return 0;
+}
+
+static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
+			  struct pt_regs *regs)
+{
+	int ret;
+
+	/* Set up the stack frame */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame(ksig, oldset, regs);
+	else
+		ret = setup_frame(ksig->sig, &ksig->ka, oldset, regs);
+
+	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+void do_signal(struct pt_regs *regs)
+{
+	struct ksignal ksig;
+	sigset_t *oldset = sigmask_to_save();
+
+	/*
+	 * Get signal to deliver. When running under ptrace, at this point
+	 * the debugger may change all our registers, including the system
+	 * call information.
+	 */
+	current_thread_info()->system_call =
+		test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
+
+	if (get_signal(&ksig)) {
+		/* Whee!  Actually deliver the signal.  */
+		if (current_thread_info()->system_call) {
+			regs->int_code = current_thread_info()->system_call;
+			/* Check for system call restarting. */
+			switch (regs->gprs[2]) {
+			case -ERESTART_RESTARTBLOCK:
+			case -ERESTARTNOHAND:
+				regs->gprs[2] = -EINTR;
+				break;
+			case -ERESTARTSYS:
+				if (!(ksig.ka.sa.sa_flags & SA_RESTART)) {
+					regs->gprs[2] = -EINTR;
+					break;
+				}
+			/* fallthrough */
+			case -ERESTARTNOINTR:
+				regs->gprs[2] = regs->orig_gpr2;
+				regs->psw.addr =
+					__rewind_psw(regs->psw,
+						     regs->int_code >> 16);
+				break;
+			}
+		}
+		/* No longer in a system call */
+		clear_pt_regs_flag(regs, PIF_SYSCALL);
+
+		if (is_compat_task())
+			handle_signal32(&ksig, oldset, regs);
+		else
+			handle_signal(&ksig, oldset, regs);
+		return;
+	}
+
+	/* No handlers present - check for system call restart */
+	clear_pt_regs_flag(regs, PIF_SYSCALL);
+	if (current_thread_info()->system_call) {
+		regs->int_code = current_thread_info()->system_call;
+		switch (regs->gprs[2]) {
+		case -ERESTART_RESTARTBLOCK:
+			/* Restart with sys_restart_syscall */
+			regs->int_code = __NR_restart_syscall;
+		/* fallthrough */
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			/* Restart system call with magic TIF bit. */
+			regs->gprs[2] = regs->orig_gpr2;
+			set_pt_regs_flag(regs, PIF_SYSCALL);
+			if (test_thread_flag(TIF_SINGLE_STEP))
+				clear_pt_regs_flag(regs, PIF_PER_TRAP);
+			break;
+		}
+	}
+
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask back.
+	 */
+	restore_saved_sigmask();
+}
+
+void do_notify_resume(struct pt_regs *regs)
+{
+	clear_thread_flag(TIF_NOTIFY_RESUME);
+	tracehook_notify_resume(regs);
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
new file mode 100644
index 000000000..efd2c1968
--- /dev/null
+++ b/arch/s390/kernel/smp.c
@@ -0,0 +1,1161 @@
+/*
+ *  SMP related functions
+ *
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Denis Joseph Barrow,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ *  based on other smp stuff by
+ *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
+ *    (c) 1998 Ingo Molnar
+ *
+ * The code outside of smp.c uses logical cpu numbers, only smp.c does
+ * the translation of logical to physical cpu ids. All new code that
+ * operates on physical cpu numbers needs to go into smp.c.
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irqflags.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/crash_dump.h>
+#include <asm/asm-offsets.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+#include <asm/irq.h>
+#include <asm/tlbflush.h>
+#include <asm/vtimer.h>
+#include <asm/lowcore.h>
+#include <asm/sclp.h>
+#include <asm/vdso.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include <asm/sigp.h>
+#include <asm/idle.h>
+#include "entry.h"
+
+enum {
+	ec_schedule = 0,
+	ec_call_function_single,
+	ec_stop_cpu,
+};
+
+enum {
+	CPU_STATE_STANDBY,
+	CPU_STATE_CONFIGURED,
+};
+
+static DEFINE_PER_CPU(struct cpu *, cpu_device);
+
+struct pcpu {
+	struct _lowcore *lowcore;	/* lowcore page(s) for the cpu */
+	unsigned long ec_mask;		/* bit mask for ec_xxx functions */
+	signed char state;		/* physical cpu state */
+	signed char polarization;	/* physical polarization */
+	u16 address;			/* physical cpu address */
+};
+
+static u8 boot_cpu_type;
+static struct pcpu pcpu_devices[NR_CPUS];
+
+unsigned int smp_cpu_mt_shift;
+EXPORT_SYMBOL(smp_cpu_mt_shift);
+
+unsigned int smp_cpu_mtid;
+EXPORT_SYMBOL(smp_cpu_mtid);
+
+static unsigned int smp_max_threads __initdata = -1U;
+
+static int __init early_nosmt(char *s)
+{
+	smp_max_threads = 1;
+	return 0;
+}
+early_param("nosmt", early_nosmt);
+
+static int __init early_smt(char *s)
+{
+	get_option(&s, &smp_max_threads);
+	return 0;
+}
+early_param("smt", early_smt);
+
+/*
+ * The smp_cpu_state_mutex must be held when changing the state or polarization
+ * member of a pcpu data structure within the pcpu_devices arreay.
+ */
+DEFINE_MUTEX(smp_cpu_state_mutex);
+
+/*
+ * Signal processor helper functions.
+ */
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm,
+				    u32 *status)
+{
+	int cc;
+
+	while (1) {
+		cc = __pcpu_sigp(addr, order, parm, NULL);
+		if (cc != SIGP_CC_BUSY)
+			return cc;
+		cpu_relax();
+	}
+}
+
+static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
+{
+	int cc, retry;
+
+	for (retry = 0; ; retry++) {
+		cc = __pcpu_sigp(pcpu->address, order, parm, NULL);
+		if (cc != SIGP_CC_BUSY)
+			break;
+		if (retry >= 3)
+			udelay(10);
+	}
+	return cc;
+}
+
+static inline int pcpu_stopped(struct pcpu *pcpu)
+{
+	u32 uninitialized_var(status);
+
+	if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
+			0, &status) != SIGP_CC_STATUS_STORED)
+		return 0;
+	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
+}
+
+static inline int pcpu_running(struct pcpu *pcpu)
+{
+	if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING,
+			0, NULL) != SIGP_CC_STATUS_STORED)
+		return 1;
+	/* Status stored condition code is equivalent to cpu not running. */
+	return 0;
+}
+
+/*
+ * Find struct pcpu by cpu address.
+ */
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		if (pcpu_devices[cpu].address == address)
+			return pcpu_devices + cpu;
+	return NULL;
+}
+
+static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
+{
+	int order;
+
+	if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+		return;
+	order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+	pcpu_sigp_retry(pcpu, order, 0);
+}
+
+#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+{
+	unsigned long async_stack, panic_stack;
+	struct _lowcore *lc;
+
+	if (pcpu != &pcpu_devices[0]) {
+		pcpu->lowcore =	(struct _lowcore *)
+			__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+		async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+		panic_stack = __get_free_page(GFP_KERNEL);
+		if (!pcpu->lowcore || !panic_stack || !async_stack)
+			goto out;
+	} else {
+		async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
+		panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+	}
+	lc = pcpu->lowcore;
+	memcpy(lc, &S390_lowcore, 512);
+	memset((char *) lc + 512, 0, sizeof(*lc) - 512);
+	lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
+	lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+	lc->cpu_nr = cpu;
+	lc->spinlock_lockval = arch_spin_lockval(cpu);
+	if (MACHINE_HAS_VX)
+		lc->vector_save_area_addr =
+			(unsigned long) &lc->vector_save_area;
+	if (vdso_alloc_per_cpu(lc))
+		goto out;
+	lowcore_ptr[cpu] = lc;
+	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
+	return 0;
+out:
+	if (pcpu != &pcpu_devices[0]) {
+		free_page(panic_stack);
+		free_pages(async_stack, ASYNC_ORDER);
+		free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+	}
+	return -ENOMEM;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void pcpu_free_lowcore(struct pcpu *pcpu)
+{
+	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
+	lowcore_ptr[pcpu - pcpu_devices] = NULL;
+	vdso_free_per_cpu(pcpu->lowcore);
+	if (pcpu == &pcpu_devices[0])
+		return;
+	free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
+	free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
+	free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
+{
+	struct _lowcore *lc = pcpu->lowcore;
+
+	if (MACHINE_HAS_TLB_LC)
+		cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+	atomic_inc(&init_mm.context.attach_count);
+	lc->cpu_nr = cpu;
+	lc->spinlock_lockval = arch_spin_lockval(cpu);
+	lc->percpu_offset = __per_cpu_offset[cpu];
+	lc->kernel_asce = S390_lowcore.kernel_asce;
+	lc->machine_flags = S390_lowcore.machine_flags;
+	lc->user_timer = lc->system_timer = lc->steal_timer = 0;
+	__ctl_store(lc->cregs_save_area, 0, 15);
+	save_access_regs((unsigned int *) lc->access_regs_save_area);
+	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+	       MAX_FACILITY_BIT/8);
+}
+
+static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+{
+	struct _lowcore *lc = pcpu->lowcore;
+	struct thread_info *ti = task_thread_info(tsk);
+
+	lc->kernel_stack = (unsigned long) task_stack_page(tsk)
+		+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+	lc->thread_info = (unsigned long) task_thread_info(tsk);
+	lc->current_task = (unsigned long) tsk;
+	lc->user_timer = ti->user_timer;
+	lc->system_timer = ti->system_timer;
+	lc->steal_timer = 0;
+}
+
+static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+{
+	struct _lowcore *lc = pcpu->lowcore;
+
+	lc->restart_stack = lc->kernel_stack;
+	lc->restart_fn = (unsigned long) func;
+	lc->restart_data = (unsigned long) data;
+	lc->restart_source = -1UL;
+	pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+}
+
+/*
+ * Call function via PSW restart on pcpu and stop the current cpu.
+ */
+static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
+			  void *data, unsigned long stack)
+{
+	struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
+	unsigned long source_cpu = stap();
+
+	__load_psw_mask(PSW_KERNEL_BITS);
+	if (pcpu->address == source_cpu)
+		func(data);	/* should not return */
+	/* Stop target cpu (if func returns this stops the current cpu). */
+	pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+	/* Restart func on the target cpu and stop the current cpu. */
+	mem_assign_absolute(lc->restart_stack, stack);
+	mem_assign_absolute(lc->restart_fn, (unsigned long) func);
+	mem_assign_absolute(lc->restart_data, (unsigned long) data);
+	mem_assign_absolute(lc->restart_source, source_cpu);
+	asm volatile(
+		"0:	sigp	0,%0,%2	# sigp restart to target cpu\n"
+		"	brc	2,0b	# busy, try again\n"
+		"1:	sigp	0,%1,%3	# sigp stop to current cpu\n"
+		"	brc	2,1b	# busy, try again\n"
+		: : "d" (pcpu->address), "d" (source_cpu),
+		    "K" (SIGP_RESTART), "K" (SIGP_STOP)
+		: "0", "1", "cc");
+	for (;;) ;
+}
+
+/*
+ * Enable additional logical cpus for multi-threading.
+ */
+static int pcpu_set_smt(unsigned int mtid)
+{
+	register unsigned long reg1 asm ("1") = (unsigned long) mtid;
+	int cc;
+
+	if (smp_cpu_mtid == mtid)
+		return 0;
+	asm volatile(
+		"	sigp	%1,0,%2	# sigp set multi-threading\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING)
+		: "cc");
+	if (cc == 0) {
+		smp_cpu_mtid = mtid;
+		smp_cpu_mt_shift = 0;
+		while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
+			smp_cpu_mt_shift++;
+		pcpu_devices[0].address = stap();
+	}
+	return cc;
+}
+
+/*
+ * Call function on an online CPU.
+ */
+void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+	struct pcpu *pcpu;
+
+	/* Use the current cpu if it is online. */
+	pcpu = pcpu_find_address(cpu_online_mask, stap());
+	if (!pcpu)
+		/* Use the first online cpu. */
+		pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
+	pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
+}
+
+/*
+ * Call function on the ipl CPU.
+ */
+void smp_call_ipl_cpu(void (*func)(void *), void *data)
+{
+	pcpu_delegate(&pcpu_devices[0], func, data,
+		      pcpu_devices->lowcore->panic_stack -
+		      PANIC_FRAME_OFFSET + PAGE_SIZE);
+}
+
+int smp_find_processor_id(u16 address)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu)
+		if (pcpu_devices[cpu].address == address)
+			return cpu;
+	return -1;
+}
+
+int smp_vcpu_scheduled(int cpu)
+{
+	return pcpu_running(pcpu_devices + cpu);
+}
+
+void smp_yield_cpu(int cpu)
+{
+	if (MACHINE_HAS_DIAG9C)
+		asm volatile("diag %0,0,0x9c"
+			     : : "d" (pcpu_devices[cpu].address));
+	else if (MACHINE_HAS_DIAG44)
+		asm volatile("diag 0,0,0x44");
+}
+
+/*
+ * Send cpus emergency shutdown signal. This gives the cpus the
+ * opportunity to complete outstanding interrupts.
+ */
+static void smp_emergency_stop(cpumask_t *cpumask)
+{
+	u64 end;
+	int cpu;
+
+	end = get_tod_clock() + (1000000UL << 12);
+	for_each_cpu(cpu, cpumask) {
+		struct pcpu *pcpu = pcpu_devices + cpu;
+		set_bit(ec_stop_cpu, &pcpu->ec_mask);
+		while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
+				   0, NULL) == SIGP_CC_BUSY &&
+		       get_tod_clock() < end)
+			cpu_relax();
+	}
+	while (get_tod_clock() < end) {
+		for_each_cpu(cpu, cpumask)
+			if (pcpu_stopped(pcpu_devices + cpu))
+				cpumask_clear_cpu(cpu, cpumask);
+		if (cpumask_empty(cpumask))
+			break;
+		cpu_relax();
+	}
+}
+
+/*
+ * Stop all cpus but the current one.
+ */
+void smp_send_stop(void)
+{
+	cpumask_t cpumask;
+	int cpu;
+
+	/* Disable all interrupts/machine checks */
+	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+	trace_hardirqs_off();
+
+	debug_set_critical();
+	cpumask_copy(&cpumask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &cpumask);
+
+	if (oops_in_progress)
+		smp_emergency_stop(&cpumask);
+
+	/* stop all processors */
+	for_each_cpu(cpu, &cpumask) {
+		struct pcpu *pcpu = pcpu_devices + cpu;
+		pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+		while (!pcpu_stopped(pcpu))
+			cpu_relax();
+	}
+}
+
+/*
+ * This is the main routine where commands issued by other
+ * cpus are handled.
+ */
+static void smp_handle_ext_call(void)
+{
+	unsigned long bits;
+
+	/* handle bit signal external calls */
+	bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+	if (test_bit(ec_stop_cpu, &bits))
+		smp_stop_cpu();
+	if (test_bit(ec_schedule, &bits))
+		scheduler_ipi();
+	if (test_bit(ec_call_function_single, &bits))
+		generic_smp_call_function_single_interrupt();
+}
+
+static void do_ext_call_interrupt(struct ext_code ext_code,
+				  unsigned int param32, unsigned long param64)
+{
+	inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
+	smp_handle_ext_call();
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+}
+
+/*
+ * parameter area for the set/clear control bit callbacks
+ */
+struct ec_creg_mask_parms {
+	unsigned long orval;
+	unsigned long andval;
+	int cr;
+};
+
+/*
+ * callback for setting/clearing control bits
+ */
+static void smp_ctl_bit_callback(void *info)
+{
+	struct ec_creg_mask_parms *pp = info;
+	unsigned long cregs[16];
+
+	__ctl_store(cregs, 0, 15);
+	cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
+	__ctl_load(cregs, 0, 15);
+}
+
+/*
+ * Set a bit in a control register of all cpus
+ */
+void smp_ctl_set_bit(int cr, int bit)
+{
+	struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
+
+	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+}
+EXPORT_SYMBOL(smp_ctl_set_bit);
+
+/*
+ * Clear a bit in a control register of all cpus
+ */
+void smp_ctl_clear_bit(int cr, int bit)
+{
+	struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
+
+	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+}
+EXPORT_SYMBOL(smp_ctl_clear_bit);
+
+#ifdef CONFIG_CRASH_DUMP
+
+static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
+{
+	void *lc = pcpu_devices[0].lowcore;
+	struct save_area_ext *sa_ext;
+	unsigned long vx_sa;
+
+	sa_ext = dump_save_area_create(cpu);
+	if (!sa_ext)
+		panic("could not allocate memory for save area\n");
+	if (is_boot_cpu) {
+		/* Copy the registers of the boot CPU. */
+		copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
+				 SAVE_AREA_BASE - PAGE_SIZE, 0);
+		if (MACHINE_HAS_VX)
+			save_vx_regs_safe(sa_ext->vx_regs);
+		return;
+	}
+	/* Get the registers of a non-boot cpu. */
+	__pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
+	memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa));
+	if (!MACHINE_HAS_VX)
+		return;
+	/* Get the VX registers */
+	vx_sa = __get_free_page(GFP_KERNEL);
+	if (!vx_sa)
+		panic("could not allocate memory for VX save area\n");
+	__pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
+	memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
+	free_page(vx_sa);
+}
+
+/*
+ * Collect CPU state of the previous, crashed system.
+ * There are four cases:
+ * 1) standard zfcp dump
+ *    condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The boot CPU state is located in
+ *    the absolute lowcore of the memory stored in the HSA. The zcore code
+ *    will allocate the save area and copy the boot CPU state from the HSA.
+ * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
+ *    condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The firmware or the boot-loader
+ *    stored the registers of the boot CPU in the absolute lowcore in the
+ *    memory of the old system.
+ * 3) kdump and the old kernel did not store the CPU state,
+ *    or stand-alone kdump for DASD
+ *    condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The kexec code or the boot-loader
+ *    stored the registers of the boot CPU in the memory of the old system.
+ * 4) kdump and the old kernel stored the CPU state
+ *    condition: OLDMEM_BASE != NULL && is_kdump_kernel()
+ *    The state of all CPUs is stored in ELF sections in the memory of the
+ *    old system. The ELF sections are picked up by the crash_dump code
+ *    via elfcorehdr_addr.
+ */
+static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
+{
+	unsigned int cpu, address, i, j;
+	int is_boot_cpu;
+
+	if (is_kdump_kernel())
+		/* Previous system stored the CPU states. Nothing to do. */
+		return;
+	if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
+		/* No previous system present, normal boot. */
+		return;
+	/* Set multi-threading state to the previous system. */
+	pcpu_set_smt(sclp_get_mtid_prev());
+	/* Collect CPU states. */
+	cpu = 0;
+	for (i = 0; i < info->configured; i++) {
+		/* Skip CPUs with different CPU type. */
+		if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+			continue;
+		for (j = 0; j <= smp_cpu_mtid; j++, cpu++) {
+			address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j;
+			is_boot_cpu = (address == pcpu_devices[0].address);
+			if (is_boot_cpu && !OLDMEM_BASE)
+				/* Skip boot CPU for standard zfcp dump. */
+				continue;
+			/* Get state for this CPu. */
+			__smp_store_cpu_state(cpu, address, is_boot_cpu);
+		}
+	}
+}
+
+int smp_store_status(int cpu)
+{
+	unsigned long vx_sa;
+	struct pcpu *pcpu;
+
+	pcpu = pcpu_devices + cpu;
+	if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
+			      0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
+		return -EIO;
+	if (!MACHINE_HAS_VX)
+		return 0;
+	vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
+	__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+			  vx_sa, NULL);
+	return 0;
+}
+
+#endif /* CONFIG_CRASH_DUMP */
+
+void smp_cpu_set_polarization(int cpu, int val)
+{
+	pcpu_devices[cpu].polarization = val;
+}
+
+int smp_cpu_get_polarization(int cpu)
+{
+	return pcpu_devices[cpu].polarization;
+}
+
+static struct sclp_cpu_info *smp_get_cpu_info(void)
+{
+	static int use_sigp_detection;
+	struct sclp_cpu_info *info;
+	int address;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+		use_sigp_detection = 1;
+		for (address = 0; address <= MAX_CPU_ADDRESS;
+		     address += (1U << smp_cpu_mt_shift)) {
+			if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
+			    SIGP_CC_NOT_OPERATIONAL)
+				continue;
+			info->cpu[info->configured].core_id =
+				address >> smp_cpu_mt_shift;
+			info->configured++;
+		}
+		info->combined = info->configured;
+	}
+	return info;
+}
+
+static int smp_add_present_cpu(int cpu);
+
+static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
+{
+	struct pcpu *pcpu;
+	cpumask_t avail;
+	int cpu, nr, i, j;
+	u16 address;
+
+	nr = 0;
+	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+	cpu = cpumask_first(&avail);
+	for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
+		if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+			continue;
+		address = info->cpu[i].core_id << smp_cpu_mt_shift;
+		for (j = 0; j <= smp_cpu_mtid; j++) {
+			if (pcpu_find_address(cpu_present_mask, address + j))
+				continue;
+			pcpu = pcpu_devices + cpu;
+			pcpu->address = address + j;
+			pcpu->state =
+				(cpu >= info->configured*(smp_cpu_mtid + 1)) ?
+				CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+			smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+			set_cpu_present(cpu, true);
+			if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+				set_cpu_present(cpu, false);
+			else
+				nr++;
+			cpu = cpumask_next(cpu, &avail);
+			if (cpu >= nr_cpu_ids)
+				break;
+		}
+	}
+	return nr;
+}
+
+static void __init smp_detect_cpus(void)
+{
+	unsigned int cpu, mtid, c_cpus, s_cpus;
+	struct sclp_cpu_info *info;
+	u16 address;
+
+	/* Get CPU information */
+	info = smp_get_cpu_info();
+	if (!info)
+		panic("smp_detect_cpus failed to allocate memory\n");
+
+	/* Find boot CPU type */
+	if (info->has_cpu_type) {
+		address = stap();
+		for (cpu = 0; cpu < info->combined; cpu++)
+			if (info->cpu[cpu].core_id == address) {
+				/* The boot cpu dictates the cpu type. */
+				boot_cpu_type = info->cpu[cpu].type;
+				break;
+			}
+		if (cpu >= info->combined)
+			panic("Could not find boot CPU type");
+	}
+
+#ifdef CONFIG_CRASH_DUMP
+	/* Collect CPU state of previous system */
+	smp_store_cpu_states(info);
+#endif
+
+	/* Set multi-threading state for the current system */
+	mtid = sclp_get_mtid(boot_cpu_type);
+	mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
+	pcpu_set_smt(mtid);
+
+	/* Print number of CPUs */
+	c_cpus = s_cpus = 0;
+	for (cpu = 0; cpu < info->combined; cpu++) {
+		if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
+			continue;
+		if (cpu < info->configured)
+			c_cpus += smp_cpu_mtid + 1;
+		else
+			s_cpus += smp_cpu_mtid + 1;
+	}
+	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
+
+	/* Add CPUs present at boot */
+	get_online_cpus();
+	__smp_rescan_cpus(info, 0);
+	put_online_cpus();
+	kfree(info);
+}
+
+/*
+ *	Activate a secondary processor.
+ */
+static void smp_start_secondary(void *cpuvoid)
+{
+	S390_lowcore.last_update_clock = get_tod_clock();
+	S390_lowcore.restart_stack = (unsigned long) restart_stack;
+	S390_lowcore.restart_fn = (unsigned long) do_restart;
+	S390_lowcore.restart_data = 0;
+	S390_lowcore.restart_source = -1UL;
+	restore_access_regs(S390_lowcore.access_regs_save_area);
+	__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
+	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+	cpu_init();
+	preempt_disable();
+	init_cpu_timer();
+	vtime_init();
+	pfault_init();
+	notify_cpu_starting(smp_processor_id());
+	set_cpu_online(smp_processor_id(), true);
+	inc_irq_stat(CPU_RST);
+	local_irq_enable();
+	cpu_startup_entry(CPUHP_ONLINE);
+}
+
+/* Upping and downing of CPUs */
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	struct pcpu *pcpu;
+	int base, i, rc;
+
+	pcpu = pcpu_devices + cpu;
+	if (pcpu->state != CPU_STATE_CONFIGURED)
+		return -EIO;
+	base = cpu - (cpu % (smp_cpu_mtid + 1));
+	for (i = 0; i <= smp_cpu_mtid; i++) {
+		if (base + i < nr_cpu_ids)
+			if (cpu_online(base + i))
+				break;
+	}
+	/*
+	 * If this is the first CPU of the core to get online
+	 * do an initial CPU reset.
+	 */
+	if (i > smp_cpu_mtid &&
+	    pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) !=
+	    SIGP_CC_ORDER_CODE_ACCEPTED)
+		return -EIO;
+
+	rc = pcpu_alloc_lowcore(pcpu, cpu);
+	if (rc)
+		return rc;
+	pcpu_prepare_secondary(pcpu, cpu);
+	pcpu_attach_task(pcpu, tidle);
+	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+	/* Wait until cpu puts itself in the online & active maps */
+	while (!cpu_online(cpu) || !cpu_active(cpu))
+		cpu_relax();
+	return 0;
+}
+
+static unsigned int setup_possible_cpus __initdata;
+
+static int __init _setup_possible_cpus(char *s)
+{
+	get_option(&s, &setup_possible_cpus);
+	return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int __cpu_disable(void)
+{
+	unsigned long cregs[16];
+
+	/* Handle possible pending IPIs */
+	smp_handle_ext_call();
+	set_cpu_online(smp_processor_id(), false);
+	/* Disable pseudo page faults on this cpu. */
+	pfault_fini();
+	/* Disable interrupt sources via control register. */
+	__ctl_store(cregs, 0, 15);
+	cregs[0]  &= ~0x0000ee70UL;	/* disable all external interrupts */
+	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
+	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
+	__ctl_load(cregs, 0, 15);
+	clear_cpu_flag(CIF_NOHZ_DELAY);
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	struct pcpu *pcpu;
+
+	/* Wait until target cpu is down */
+	pcpu = pcpu_devices + cpu;
+	while (!pcpu_stopped(pcpu))
+		cpu_relax();
+	pcpu_free_lowcore(pcpu);
+	atomic_dec(&init_mm.context.attach_count);
+	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+	if (MACHINE_HAS_TLB_LC)
+		cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+}
+
+void __noreturn cpu_die(void)
+{
+	idle_task_exit();
+	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+	for (;;) ;
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void __init smp_fill_possible_mask(void)
+{
+	unsigned int possible, sclp, cpu;
+
+	sclp = min(smp_max_threads, sclp_get_mtid_max() + 1);
+	sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids;
+	possible = setup_possible_cpus ?: nr_cpu_ids;
+	possible = min(possible, sclp);
+	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+		set_cpu_possible(cpu, true);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	/* request the 0x1201 emergency signal external interrupt */
+	if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
+		panic("Couldn't request external interrupt 0x1201");
+	/* request the 0x1202 external call external interrupt */
+	if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
+		panic("Couldn't request external interrupt 0x1202");
+	smp_detect_cpus();
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+	struct pcpu *pcpu = pcpu_devices;
+
+	pcpu->state = CPU_STATE_CONFIGURED;
+	pcpu->address = stap();
+	pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
+	S390_lowcore.percpu_offset = __per_cpu_offset[0];
+	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+	set_cpu_present(0, true);
+	set_cpu_online(0, true);
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+void __init smp_setup_processor_id(void)
+{
+	S390_lowcore.cpu_nr = 0;
+	S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
+}
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ *
+ * usually you want to run this on all CPUs ;)
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static ssize_t cpu_configure_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t cpu_configure_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct pcpu *pcpu;
+	int cpu, val, rc, i;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	get_online_cpus();
+	mutex_lock(&smp_cpu_state_mutex);
+	rc = -EBUSY;
+	/* disallow configuration changes of online cpus and cpu 0 */
+	cpu = dev->id;
+	cpu -= cpu % (smp_cpu_mtid + 1);
+	if (cpu == 0)
+		goto out;
+	for (i = 0; i <= smp_cpu_mtid; i++)
+		if (cpu_online(cpu + i))
+			goto out;
+	pcpu = pcpu_devices + cpu;
+	rc = 0;
+	switch (val) {
+	case 0:
+		if (pcpu->state != CPU_STATE_CONFIGURED)
+			break;
+		rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+		if (rc)
+			break;
+		for (i = 0; i <= smp_cpu_mtid; i++) {
+			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+				continue;
+			pcpu[i].state = CPU_STATE_STANDBY;
+			smp_cpu_set_polarization(cpu + i,
+						 POLARIZATION_UNKNOWN);
+		}
+		topology_expect_change();
+		break;
+	case 1:
+		if (pcpu->state != CPU_STATE_STANDBY)
+			break;
+		rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift);
+		if (rc)
+			break;
+		for (i = 0; i <= smp_cpu_mtid; i++) {
+			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+				continue;
+			pcpu[i].state = CPU_STATE_CONFIGURED;
+			smp_cpu_set_polarization(cpu + i,
+						 POLARIZATION_UNKNOWN);
+		}
+		topology_expect_change();
+		break;
+	default:
+		break;
+	}
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	put_online_cpus();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static ssize_t show_cpu_address(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+}
+static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
+
+static struct attribute *cpu_common_attrs[] = {
+#ifdef CONFIG_HOTPLUG_CPU
+	&dev_attr_configure.attr,
+#endif
+	&dev_attr_address.attr,
+	NULL,
+};
+
+static struct attribute_group cpu_common_attr_group = {
+	.attrs = cpu_common_attrs,
+};
+
+static struct attribute *cpu_online_attrs[] = {
+	&dev_attr_idle_count.attr,
+	&dev_attr_idle_time_us.attr,
+	NULL,
+};
+
+static struct attribute_group cpu_online_attr_group = {
+	.attrs = cpu_online_attrs,
+};
+
+static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
+			  void *hcpu)
+{
+	unsigned int cpu = (unsigned int)(long)hcpu;
+	struct device *s = &per_cpu(cpu_device, cpu)->dev;
+	int err = 0;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+		err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+		break;
+	case CPU_DEAD:
+		sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+		break;
+	}
+	return notifier_from_errno(err);
+}
+
+static int smp_add_present_cpu(int cpu)
+{
+	struct device *s;
+	struct cpu *c;
+	int rc;
+
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+	per_cpu(cpu_device, cpu) = c;
+	s = &c->dev;
+	c->hotpluggable = 1;
+	rc = register_cpu(c, cpu);
+	if (rc)
+		goto out;
+	rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+	if (rc)
+		goto out_cpu;
+	if (cpu_online(cpu)) {
+		rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+		if (rc)
+			goto out_online;
+	}
+	rc = topology_cpu_init(c);
+	if (rc)
+		goto out_topology;
+	return 0;
+
+out_topology:
+	if (cpu_online(cpu))
+		sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+out_online:
+	sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+out_cpu:
+#ifdef CONFIG_HOTPLUG_CPU
+	unregister_cpu(c);
+#endif
+out:
+	return rc;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int __ref smp_rescan_cpus(void)
+{
+	struct sclp_cpu_info *info;
+	int nr;
+
+	info = smp_get_cpu_info();
+	if (!info)
+		return -ENOMEM;
+	get_online_cpus();
+	mutex_lock(&smp_cpu_state_mutex);
+	nr = __smp_rescan_cpus(info, 1);
+	mutex_unlock(&smp_cpu_state_mutex);
+	put_online_cpus();
+	kfree(info);
+	if (nr)
+		topology_schedule_update();
+	return 0;
+}
+
+static ssize_t __ref rescan_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf,
+				  size_t count)
+{
+	int rc;
+
+	rc = smp_rescan_cpus();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __init s390_smp_init(void)
+{
+	int cpu, rc = 0;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
+	if (rc)
+		return rc;
+#endif
+	cpu_notifier_register_begin();
+	for_each_present_cpu(cpu) {
+		rc = smp_add_present_cpu(cpu);
+		if (rc)
+			goto out;
+	}
+
+	__hotcpu_notifier(smp_cpu_notify, 0);
+
+out:
+	cpu_notifier_register_done();
+	return rc;
+}
+subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
new file mode 100644
index 000000000..1785cd822
--- /dev/null
+++ b/arch/s390/kernel/stacktrace.c
@@ -0,0 +1,96 @@
+/*
+ * Stack trace management functions
+ *
+ *  Copyright IBM Corp. 2006
+ *  Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+
+static unsigned long save_context_stack(struct stack_trace *trace,
+					unsigned long sp,
+					unsigned long low,
+					unsigned long high,
+					int savesched)
+{
+	struct stack_frame *sf;
+	struct pt_regs *regs;
+	unsigned long addr;
+
+	while(1) {
+		sp &= PSW_ADDR_INSN;
+		if (sp < low || sp > high)
+			return sp;
+		sf = (struct stack_frame *)sp;
+		while(1) {
+			addr = sf->gprs[8] & PSW_ADDR_INSN;
+			if (!trace->skip)
+				trace->entries[trace->nr_entries++] = addr;
+			else
+				trace->skip--;
+			if (trace->nr_entries >= trace->max_entries)
+				return sp;
+			low = sp;
+			sp = sf->back_chain & PSW_ADDR_INSN;
+			if (!sp)
+				break;
+			if (sp <= low || sp > high - sizeof(*sf))
+				return sp;
+			sf = (struct stack_frame *)sp;
+		}
+		/* Zero backchain detected, check for interrupt frame. */
+		sp = (unsigned long)(sf + 1);
+		if (sp <= low || sp > high - sizeof(*regs))
+			return sp;
+		regs = (struct pt_regs *)sp;
+		addr = regs->psw.addr & PSW_ADDR_INSN;
+		if (savesched || !in_sched_functions(addr)) {
+			if (!trace->skip)
+				trace->entries[trace->nr_entries++] = addr;
+			else
+				trace->skip--;
+		}
+		if (trace->nr_entries >= trace->max_entries)
+			return sp;
+		low = sp;
+		sp = regs->gprs[15];
+	}
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	register unsigned long sp asm ("15");
+	unsigned long orig_sp, new_sp;
+
+	orig_sp = sp & PSW_ADDR_INSN;
+	new_sp = save_context_stack(trace, orig_sp,
+				    S390_lowcore.panic_stack - PAGE_SIZE,
+				    S390_lowcore.panic_stack, 1);
+	if (new_sp != orig_sp)
+		return;
+	new_sp = save_context_stack(trace, new_sp,
+				    S390_lowcore.async_stack - ASYNC_SIZE,
+				    S390_lowcore.async_stack, 1);
+	if (new_sp != orig_sp)
+		return;
+	save_context_stack(trace, new_sp,
+			   S390_lowcore.thread_info,
+			   S390_lowcore.thread_info + THREAD_SIZE, 1);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	unsigned long sp, low, high;
+
+	sp = tsk->thread.ksp & PSW_ADDR_INSN;
+	low = (unsigned long) task_stack_page(tsk);
+	high = (unsigned long) task_pt_regs(tsk);
+	save_context_stack(trace, sp, low, high, 0);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
new file mode 100644
index 000000000..d3236c9e2
--- /dev/null
+++ b/arch/s390/kernel/suspend.c
@@ -0,0 +1,225 @@
+/*
+ * Suspend support specific for s390.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ */
+
+#include <linux/pfn.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/pci.h>
+#include <asm/sections.h>
+#include "entry.h"
+
+/*
+ * The restore of the saved pages in an hibernation image will set
+ * the change and referenced bits in the storage key for each page.
+ * Overindication of the referenced bits after an hibernation cycle
+ * does not cause any harm but the overindication of the change bits
+ * would cause trouble.
+ * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
+ * page to the most significant byte of the associated page frame
+ * number in the hibernation image.
+ */
+
+/*
+ * Key storage is allocated as a linked list of pages.
+ * The size of the keys array is (PAGE_SIZE - sizeof(long))
+ */
+struct page_key_data {
+	struct page_key_data *next;
+	unsigned char data[];
+};
+
+#define PAGE_KEY_DATA_SIZE	(PAGE_SIZE - sizeof(struct page_key_data *))
+
+static struct page_key_data *page_key_data;
+static struct page_key_data *page_key_rp, *page_key_wp;
+static unsigned long page_key_rx, page_key_wx;
+unsigned long suspend_zero_pages;
+
+/*
+ * For each page in the hibernation image one additional byte is
+ * stored in the most significant byte of the page frame number.
+ * On suspend no additional memory is required but on resume the
+ * keys need to be memorized until the page data has been restored.
+ * Only then can the storage keys be set to their old state.
+ */
+unsigned long page_key_additional_pages(unsigned long pages)
+{
+	return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+}
+
+/*
+ * Free page_key_data list of arrays.
+ */
+void page_key_free(void)
+{
+	struct page_key_data *pkd;
+
+	while (page_key_data) {
+		pkd = page_key_data;
+		page_key_data = pkd->next;
+		free_page((unsigned long) pkd);
+	}
+}
+
+/*
+ * Allocate page_key_data list of arrays with enough room to store
+ * one byte for each page in the hibernation image.
+ */
+int page_key_alloc(unsigned long pages)
+{
+	struct page_key_data *pk;
+	unsigned long size;
+
+	size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+	while (size--) {
+		pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
+		if (!pk) {
+			page_key_free();
+			return -ENOMEM;
+		}
+		pk->next = page_key_data;
+		page_key_data = pk;
+	}
+	page_key_rp = page_key_wp = page_key_data;
+	page_key_rx = page_key_wx = 0;
+	return 0;
+}
+
+/*
+ * Save the storage key into the upper 8 bits of the page frame number.
+ */
+void page_key_read(unsigned long *pfn)
+{
+	unsigned long addr;
+
+	addr = (unsigned long) page_address(pfn_to_page(*pfn));
+	*(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
+}
+
+/*
+ * Extract the storage key from the upper 8 bits of the page frame number
+ * and store it in the page_key_data list of arrays.
+ */
+void page_key_memorize(unsigned long *pfn)
+{
+	page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
+	*(unsigned char *) pfn = 0;
+	if (++page_key_wx < PAGE_KEY_DATA_SIZE)
+		return;
+	page_key_wp = page_key_wp->next;
+	page_key_wx = 0;
+}
+
+/*
+ * Get the next key from the page_key_data list of arrays and set the
+ * storage key of the page referred by @address. If @address refers to
+ * a "safe" page the swsusp_arch_resume code will transfer the storage
+ * key from the buffer page to the original page.
+ */
+void page_key_write(void *address)
+{
+	page_set_storage_key((unsigned long) address,
+			     page_key_rp->data[page_key_rx], 0);
+	if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
+		return;
+	page_key_rp = page_key_rp->next;
+	page_key_rx = 0;
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+	unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
+	unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
+	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
+
+	/* Always save lowcore pages (LC protection might be enabled). */
+	if (pfn <= LC_PAGES)
+		return 0;
+	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
+		return 1;
+	/* Skip memory holes and read-only pages (NSS, DCSS, ...). */
+	if (pfn >= stext_pfn && pfn <= eshared_pfn)
+		return ipl_info.type == IPL_TYPE_NSS ? 1 : 0;
+	if (tprot(PFN_PHYS(pfn)))
+		return 1;
+	return 0;
+}
+
+/*
+ * PM notifier callback for suspend
+ */
+static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
+			 void *ptr)
+{
+	switch (action) {
+	case PM_SUSPEND_PREPARE:
+	case PM_HIBERNATION_PREPARE:
+		suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
+		if (!suspend_zero_pages)
+			return NOTIFY_BAD;
+		break;
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+		free_pages(suspend_zero_pages, LC_ORDER);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+	return NOTIFY_OK;
+}
+
+static int __init suspend_pm_init(void)
+{
+	pm_notifier(suspend_pm_cb, 0);
+	return 0;
+}
+arch_initcall(suspend_pm_init);
+
+void save_processor_state(void)
+{
+	/* swsusp_arch_suspend() actually saves all cpu register contents.
+	 * Machine checks must be disabled since swsusp_arch_suspend() stores
+	 * register contents to their lowcore save areas. That's the same
+	 * place where register contents on machine checks would be saved.
+	 * To avoid register corruption disable machine checks.
+	 * We must also disable machine checks in the new psw mask for
+	 * program checks, since swsusp_arch_suspend() may generate program
+	 * checks. Disabling machine checks for all other new psw masks is
+	 * just paranoia.
+	 */
+	local_mcck_disable();
+	/* Disable lowcore protection */
+	__ctl_clear_bit(0,28);
+	S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
+	S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
+}
+
+void restore_processor_state(void)
+{
+	S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
+	S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
+	/* Enable lowcore protection */
+	__ctl_set_bit(0,28);
+	local_mcck_enable();
+}
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+	lgr_info_log();
+	channel_subsystem_reinit();
+	zpci_rescan();
+}
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
new file mode 100644
index 000000000..ca6294645
--- /dev/null
+++ b/arch/s390/kernel/swsusp.S
@@ -0,0 +1,317 @@
+/*
+ * S390 64-bit swsusp implementation
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigp.h>
+
+/*
+ * Save register context in absolute 0 lowcore and call swsusp_save() to
+ * create in-memory kernel image. The context is saved in the designated
+ * "store status" memory locations (see POP).
+ * We return from this function twice. The first time during the suspend to
+ * disk process. The second time via the swsusp_arch_resume() function
+ * (see below) in the resume process.
+ * This function runs with disabled interrupts.
+ */
+	.section .text
+ENTRY(swsusp_arch_suspend)
+	stmg	%r6,%r15,__SF_GPRS(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-STACK_FRAME_OVERHEAD
+	stg	%r1,__SF_BACKCHAIN(%r15)
+
+	/* Deactivate DAT */
+	stnsm	__SF_EMPTY(%r15),0xfb
+
+	/* Store prefix register on stack */
+	stpx	__SF_EMPTY(%r15)
+
+	/* Save prefix register contents for lowcore copy */
+	llgf	%r10,__SF_EMPTY(%r15)
+
+	/* Get pointer to save area */
+	lghi	%r1,0x1000
+
+	/* Save CPU address */
+	stap	__LC_EXT_CPU_ADDR(%r0)
+
+	/* Store registers */
+	mvc	0x318(4,%r1),__SF_EMPTY(%r15)	/* move prefix to lowcore */
+	stfpc	0x31c(%r1)			/* store fpu control */
+	std	0,0x200(%r1)			/* store f0 */
+	std	1,0x208(%r1)			/* store f1 */
+	std	2,0x210(%r1)			/* store f2 */
+	std	3,0x218(%r1)			/* store f3 */
+	std	4,0x220(%r1)			/* store f4 */
+	std	5,0x228(%r1)			/* store f5 */
+	std	6,0x230(%r1)			/* store f6 */
+	std	7,0x238(%r1)			/* store f7 */
+	std	8,0x240(%r1)			/* store f8 */
+	std	9,0x248(%r1)			/* store f9 */
+	std	10,0x250(%r1)			/* store f10 */
+	std	11,0x258(%r1)			/* store f11 */
+	std	12,0x260(%r1)			/* store f12 */
+	std	13,0x268(%r1)			/* store f13 */
+	std	14,0x270(%r1)			/* store f14 */
+	std	15,0x278(%r1)			/* store f15 */
+	stam	%a0,%a15,0x340(%r1)		/* store access registers */
+	stctg	%c0,%c15,0x380(%r1)		/* store control registers */
+	stmg	%r0,%r15,0x280(%r1)		/* store general registers */
+
+	stpt	0x328(%r1)			/* store timer */
+	stck	__SF_EMPTY(%r15)		/* store clock */
+	stckc	0x330(%r1)			/* store clock comparator */
+
+	/* Update cputime accounting before going to sleep */
+	lg	%r0,__LC_LAST_UPDATE_TIMER
+	slg	%r0,0x328(%r1)
+	alg	%r0,__LC_SYSTEM_TIMER
+	stg	%r0,__LC_SYSTEM_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),0x328(%r1)
+	lg	%r0,__LC_LAST_UPDATE_CLOCK
+	slg	%r0,__SF_EMPTY(%r15)
+	alg	%r0,__LC_STEAL_TIMER
+	stg	%r0,__LC_STEAL_TIMER
+	mvc	__LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
+
+	/* Activate DAT */
+	stosm	__SF_EMPTY(%r15),0x04
+
+	/* Set prefix page to zero */
+	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+	spx	__SF_EMPTY(%r15)
+
+	/* Save absolute zero pages */
+	larl	%r2,suspend_zero_pages
+	lg	%r2,0(%r2)
+	lghi	%r4,0
+	lghi	%r3,2*PAGE_SIZE
+	lghi	%r5,2*PAGE_SIZE
+1:	mvcle	%r2,%r4,0
+	jo	1b
+
+	/* Copy lowcore to absolute zero lowcore */
+	lghi	%r2,0
+	lgr	%r4,%r10
+	lghi	%r3,2*PAGE_SIZE
+	lghi	%r5,2*PAGE_SIZE
+1:	mvcle	%r2,%r4,0
+	jo	1b
+
+	/* Save image */
+	brasl	%r14,swsusp_save
+
+	/* Restore prefix register and return */
+	lghi	%r1,0x1000
+	spx	0x318(%r1)
+	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+	lghi	%r2,0
+	br	%r14
+
+/*
+ * Restore saved memory image to correct place and restore register context.
+ * Then we return to the function that called swsusp_arch_suspend().
+ * swsusp_arch_resume() runs with disabled interrupts.
+ */
+ENTRY(swsusp_arch_resume)
+	stmg	%r6,%r15,__SF_GPRS(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-STACK_FRAME_OVERHEAD
+	stg	%r1,__SF_BACKCHAIN(%r15)
+
+	/* Make all free pages stable */
+	lghi	%r2,1
+	brasl	%r14,arch_set_page_states
+
+	/* Deactivate DAT */
+	stnsm	__SF_EMPTY(%r15),0xfb
+
+	/* Set prefix page to zero */
+	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+	spx	__SF_EMPTY(%r15)
+
+	/* Restore saved image */
+	larl	%r1,restore_pblist
+	lg	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	2f
+0:
+	lg	%r2,8(%r1)
+	lg	%r4,0(%r1)
+	iske	%r0,%r4
+	lghi	%r3,PAGE_SIZE
+	lghi	%r5,PAGE_SIZE
+1:
+	mvcle	%r2,%r4,0
+	jo	1b
+	lg	%r2,8(%r1)
+	sske	%r0,%r2
+	lg	%r1,16(%r1)
+	ltgr	%r1,%r1
+	jnz	0b
+2:
+	ptlb				/* flush tlb */
+
+	/* Reset System */
+	larl	%r1,restart_entry
+	larl	%r2,.Lrestart_diag308_psw
+	og	%r1,0(%r2)
+	stg	%r1,0(%r0)
+	larl	%r1,.Lnew_pgm_check_psw
+	epsw	%r2,%r3
+	stm	%r2,%r3,0(%r1)
+	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
+	lghi	%r0,0
+	diag	%r0,%r0,0x308
+restart_entry:
+	lhi	%r1,1
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
+	sam64
+#ifdef CONFIG_SMP
+	larl	%r1,smp_cpu_mt_shift
+	icm	%r1,15,0(%r1)
+	jz	smt_done
+	llgfr	%r1,%r1
+smt_loop:
+	sigp	%r1,%r0,SIGP_SET_MULTI_THREADING
+	brc	8,smt_done			/* accepted */
+	brc	2,smt_loop			/* busy, try again */
+smt_done:
+#endif
+	larl	%r1,.Lnew_pgm_check_psw
+	lpswe	0(%r1)
+pgm_check_entry:
+
+	/* Switch to original suspend CPU */
+	larl	%r1,.Lresume_cpu		/* Resume CPU address: r2 */
+	stap	0(%r1)
+	llgh	%r2,0(%r1)
+	llgh	%r1,__LC_EXT_CPU_ADDR(%r0)	/* Suspend CPU address: r1 */
+	cgr	%r1,%r2
+	je	restore_registers		/* r1 = r2 -> nothing to do */
+	larl	%r4,.Lrestart_suspend_psw	/* Set new restart PSW */
+	mvc	__LC_RST_NEW_PSW(16,%r0),0(%r4)
+3:
+	sigp	%r9,%r1,SIGP_INITIAL_CPU_RESET	/* sigp initial cpu reset */
+	brc	8,4f				/* accepted */
+	brc	2,3b				/* busy, try again */
+
+	/* Suspend CPU not available -> panic */
+	larl	%r15,init_thread_union
+	ahi	%r15,1<<(PAGE_SHIFT+THREAD_ORDER)
+	larl	%r2,.Lpanic_string
+	larl	%r3,_sclp_print_early
+	lghi	%r1,0
+	sam31
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
+	basr	%r14,%r3
+	larl	%r3,.Ldisabled_wait_31
+	lpsw	0(%r3)
+4:
+	/* Switch to suspend CPU */
+	sigp	%r9,%r1,SIGP_RESTART	/* sigp restart to suspend CPU */
+	brc	2,4b			/* busy, try again */
+5:
+	sigp	%r9,%r2,SIGP_STOP	/* sigp stop to current resume CPU */
+	brc	2,5b			/* busy, try again */
+6:	j	6b
+
+restart_suspend:
+	larl	%r1,.Lresume_cpu
+	llgh	%r2,0(%r1)
+7:
+	sigp	%r9,%r2,SIGP_SENSE	/* sigp sense, wait for resume CPU */
+	brc	8,7b			/* accepted, status 0, still running */
+	brc	2,7b			/* busy, try again */
+	tmll	%r9,0x40		/* Test if resume CPU is stopped */
+	jz	7b
+
+restore_registers:
+	/* Restore registers */
+	lghi	%r13,0x1000		/* %r1 = pointer to save area */
+
+	/* Ignore time spent in suspended state. */
+	llgf	%r1,0x318(%r13)
+	stck	__LC_LAST_UPDATE_CLOCK(%r1)
+	spt	0x328(%r13)		/* reprogram timer */
+	//sckc	0x330(%r13)		/* set clock comparator */
+
+	lctlg	%c0,%c15,0x380(%r13)	/* load control registers */
+	lam	%a0,%a15,0x340(%r13)	/* load access registers */
+
+	lfpc	0x31c(%r13)		/* load fpu control */
+	ld	0,0x200(%r13)		/* load f0 */
+	ld	1,0x208(%r13)		/* load f1 */
+	ld	2,0x210(%r13)		/* load f2 */
+	ld	3,0x218(%r13)		/* load f3 */
+	ld	4,0x220(%r13)		/* load f4 */
+	ld	5,0x228(%r13)		/* load f5 */
+	ld	6,0x230(%r13)		/* load f6 */
+	ld	7,0x238(%r13)		/* load f7 */
+	ld	8,0x240(%r13)		/* load f8 */
+	ld	9,0x248(%r13)		/* load f9 */
+	ld	10,0x250(%r13)		/* load f10 */
+	ld	11,0x258(%r13)		/* load f11 */
+	ld	12,0x260(%r13)		/* load f12 */
+	ld	13,0x268(%r13)		/* load f13 */
+	ld	14,0x270(%r13)		/* load f14 */
+	ld	15,0x278(%r13)		/* load f15 */
+
+	/* Load old stack */
+	lg	%r15,0x2f8(%r13)
+
+	/* Save prefix register */
+	mvc __SF_EMPTY(4,%r15),0x318(%r13)
+
+	/* Restore absolute zero pages */
+	lghi	%r2,0
+	larl	%r4,suspend_zero_pages
+	lg	%r4,0(%r4)
+	lghi	%r3,2*PAGE_SIZE
+	lghi	%r5,2*PAGE_SIZE
+1:	mvcle	%r2,%r4,0
+	jo	1b
+
+	/* Restore prefix register */
+	spx	__SF_EMPTY(%r15)
+
+	/* Activate DAT */
+	stosm	__SF_EMPTY(%r15),0x04
+
+	/* Make all free pages unstable */
+	lghi	%r2,0
+	brasl	%r14,arch_set_page_states
+
+	/* Call arch specific early resume code */
+	brasl	%r14,s390_early_resume
+
+	/* Return 0 */
+	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+	lghi	%r2,0
+	br	%r14
+
+	.section .data..nosave,"aw",@progbits
+	.align	8
+.Ldisabled_wait_31:
+	.long  0x000a0000,0x00000000
+.Lpanic_string:
+	.asciz	"Resume not possible because suspend CPU is no longer available"
+	.align	8
+.Lrestart_diag308_psw:
+	.long	0x00080000,0x80000000
+.Lrestart_suspend_psw:
+	.quad	0x0000000180000000,restart_suspend
+.Lnew_pgm_check_psw:
+	.quad	0,pgm_check_entry
+.Lresume_cpu:
+	.byte	0,0
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
new file mode 100644
index 000000000..f145490cc
--- /dev/null
+++ b/arch/s390/kernel/sys_s390.c
@@ -0,0 +1,91 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Thomas Spatzier (tspat@de.ibm.com)
+ *
+ *  Derived from "arch/i386/kernel/sys_i386.c"
+ *
+ *  This file contains various random system calls that
+ *  have a non-standard calling sequence on the Linux/s390
+ *  platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/personality.h>
+#include <linux/unistd.h>
+#include <linux/ipc.h>
+#include <asm/uaccess.h>
+#include "entry.h"
+
+/*
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
+ */
+
+struct s390_mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
+{
+	struct s390_mmap_arg_struct a;
+	int error = -EFAULT;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		goto out;
+	error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+out:
+	return error;
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.
+ */
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
+		unsigned long, third, void __user *, ptr)
+{
+	if (call >> 16)
+		return -EINVAL;
+	/* The s390 sys_ipc variant has only five parameters instead of six
+	 * like the generic variant. The only difference is the handling of
+	 * the SEMTIMEDOP subcall where on s390 the third parameter is used
+	 * as a pointer to a struct timespec where the generic variant uses
+	 * the fifth parameter.
+	 * Therefore we can call the generic variant by simply passing the
+	 * third parameter also as fifth parameter.
+	 */
+	return sys_ipc(call, first, second, third, ptr, third);
+}
+
+SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
+{
+	unsigned int ret;
+
+	if (personality(current->personality) == PER_LINUX32 &&
+	    personality(personality) == PER_LINUX)
+		personality |= PER_LINUX32;
+	ret = sys_personality(personality);
+	if (personality(ret) == PER_LINUX32)
+		ret &= ~PER_LINUX32;
+
+	return ret;
+}
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
new file mode 100644
index 000000000..1acad0268
--- /dev/null
+++ b/arch/s390/kernel/syscalls.S
@@ -0,0 +1,365 @@
+/*
+ * definitions for sys_call_table, each line represents an
+ * entry in the table in the form
+ * SYSCALL(64 bit syscall, 31 bit emulated syscall)
+ *
+ * this file is meant to be included from entry.S
+ */
+
+#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall)
+
+NI_SYSCALL						/* 0 */
+SYSCALL(sys_exit,compat_sys_exit)
+SYSCALL(sys_fork,sys_fork)
+SYSCALL(sys_read,compat_sys_s390_read)
+SYSCALL(sys_write,compat_sys_s390_write)
+SYSCALL(sys_open,compat_sys_open)			/* 5 */
+SYSCALL(sys_close,compat_sys_close)
+SYSCALL(sys_restart_syscall,sys_restart_syscall)
+SYSCALL(sys_creat,compat_sys_creat)
+SYSCALL(sys_link,compat_sys_link)
+SYSCALL(sys_unlink,compat_sys_unlink)			/* 10 */
+SYSCALL(sys_execve,compat_sys_execve)
+SYSCALL(sys_chdir,compat_sys_chdir)
+SYSCALL(sys_ni_syscall,compat_sys_time)			/* old time syscall */
+SYSCALL(sys_mknod,compat_sys_mknod)
+SYSCALL(sys_chmod,compat_sys_chmod)			/* 15 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_lchown16)	/* old lchown16 syscall*/
+NI_SYSCALL						/* old break syscall holder */
+NI_SYSCALL						/* old stat syscall holder */
+SYSCALL(sys_lseek,compat_sys_lseek)
+SYSCALL(sys_getpid,sys_getpid)				/* 20 */
+SYSCALL(sys_mount,compat_sys_mount)
+SYSCALL(sys_oldumount,compat_sys_oldumount)
+SYSCALL(sys_ni_syscall,compat_sys_s390_setuid16)	/* old setuid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_getuid16)	/* old getuid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_stime)		/* 25 old stime syscall */
+SYSCALL(sys_ptrace,compat_sys_ptrace)
+SYSCALL(sys_alarm,compat_sys_alarm)
+NI_SYSCALL						/* old fstat syscall */
+SYSCALL(sys_pause,sys_pause)
+SYSCALL(sys_utime,compat_sys_utime)			/* 30 */
+NI_SYSCALL						/* old stty syscall */
+NI_SYSCALL						/* old gtty syscall */
+SYSCALL(sys_access,compat_sys_access)
+SYSCALL(sys_nice,compat_sys_nice)
+NI_SYSCALL						/* 35 old ftime syscall */
+SYSCALL(sys_sync,sys_sync)
+SYSCALL(sys_kill,compat_sys_kill)
+SYSCALL(sys_rename,compat_sys_rename)
+SYSCALL(sys_mkdir,compat_sys_mkdir)
+SYSCALL(sys_rmdir,compat_sys_rmdir)			/* 40 */
+SYSCALL(sys_dup,compat_sys_dup)
+SYSCALL(sys_pipe,compat_sys_pipe)
+SYSCALL(sys_times,compat_sys_times)
+NI_SYSCALL						/* old prof syscall */
+SYSCALL(sys_brk,compat_sys_brk)				/* 45 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setgid16)	/* old setgid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_getgid16)	/* old getgid16 syscall*/
+SYSCALL(sys_signal,compat_sys_signal)
+SYSCALL(sys_ni_syscall,compat_sys_s390_geteuid16)	/* old geteuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getegid16)	/* 50 old getegid16 syscall */
+SYSCALL(sys_acct,compat_sys_acct)
+SYSCALL(sys_umount,compat_sys_umount)
+NI_SYSCALL						/* old lock syscall */
+SYSCALL(sys_ioctl,compat_sys_ioctl)
+SYSCALL(sys_fcntl,compat_sys_fcntl)			/* 55 */
+NI_SYSCALL						/* intel mpx syscall */
+SYSCALL(sys_setpgid,compat_sys_setpgid)
+NI_SYSCALL						/* old ulimit syscall */
+NI_SYSCALL						/* old uname syscall */
+SYSCALL(sys_umask,compat_sys_umask)			/* 60 */
+SYSCALL(sys_chroot,compat_sys_chroot)
+SYSCALL(sys_ustat,compat_sys_ustat)
+SYSCALL(sys_dup2,compat_sys_dup2)
+SYSCALL(sys_getppid,sys_getppid)
+SYSCALL(sys_getpgrp,sys_getpgrp)			/* 65 */
+SYSCALL(sys_setsid,sys_setsid)
+SYSCALL(sys_sigaction,compat_sys_sigaction)
+NI_SYSCALL						/* old sgetmask syscall*/
+NI_SYSCALL						/* old ssetmask syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_setreuid16)	/* old setreuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setregid16)	/* old setregid16 syscall */
+SYSCALL(sys_sigsuspend,compat_sys_sigsuspend)
+SYSCALL(sys_sigpending,compat_sys_sigpending)
+SYSCALL(sys_sethostname,compat_sys_sethostname)
+SYSCALL(sys_setrlimit,compat_sys_setrlimit)		/* 75 */
+SYSCALL(sys_getrlimit,compat_sys_old_getrlimit)
+SYSCALL(sys_getrusage,compat_sys_getrusage)
+SYSCALL(sys_gettimeofday,compat_sys_gettimeofday)
+SYSCALL(sys_settimeofday,compat_sys_settimeofday)
+SYSCALL(sys_ni_syscall,compat_sys_s390_getgroups16)	/* 80 old getgroups16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setgroups16)	/* old setgroups16 syscall */
+NI_SYSCALL						/* old select syscall */
+SYSCALL(sys_symlink,compat_sys_symlink)
+NI_SYSCALL						/* old lstat syscall */
+SYSCALL(sys_readlink,compat_sys_readlink)		/* 85 */
+SYSCALL(sys_uselib,compat_sys_uselib)
+SYSCALL(sys_swapon,compat_sys_swapon)
+SYSCALL(sys_reboot,compat_sys_reboot)
+SYSCALL(sys_ni_syscall,compat_sys_old_readdir)		/* old readdir syscall */
+SYSCALL(sys_old_mmap,compat_sys_s390_old_mmap)		/* 90 */
+SYSCALL(sys_munmap,compat_sys_munmap)
+SYSCALL(sys_truncate,compat_sys_truncate)
+SYSCALL(sys_ftruncate,compat_sys_ftruncate)
+SYSCALL(sys_fchmod,compat_sys_fchmod)
+SYSCALL(sys_ni_syscall,compat_sys_s390_fchown16)	/* 95 old fchown16 syscall*/
+SYSCALL(sys_getpriority,compat_sys_getpriority)
+SYSCALL(sys_setpriority,compat_sys_setpriority)
+NI_SYSCALL						/* old profil syscall */
+SYSCALL(sys_statfs,compat_sys_statfs)
+SYSCALL(sys_fstatfs,compat_sys_fstatfs)			/* 100 */
+NI_SYSCALL						/* ioperm for i386 */
+SYSCALL(sys_socketcall,compat_sys_socketcall)
+SYSCALL(sys_syslog,compat_sys_syslog)
+SYSCALL(sys_setitimer,compat_sys_setitimer)
+SYSCALL(sys_getitimer,compat_sys_getitimer)		/* 105 */
+SYSCALL(sys_newstat,compat_sys_newstat)
+SYSCALL(sys_newlstat,compat_sys_newlstat)
+SYSCALL(sys_newfstat,compat_sys_newfstat)
+NI_SYSCALL						/* old uname syscall */
+SYSCALL(sys_lookup_dcookie,compat_sys_lookup_dcookie)	/* 110 */
+SYSCALL(sys_vhangup,sys_vhangup)
+NI_SYSCALL						/* old "idle" system call */
+NI_SYSCALL						/* vm86old for i386 */
+SYSCALL(sys_wait4,compat_sys_wait4)
+SYSCALL(sys_swapoff,compat_sys_swapoff)			/* 115 */
+SYSCALL(sys_sysinfo,compat_sys_sysinfo)
+SYSCALL(sys_s390_ipc,compat_sys_s390_ipc)
+SYSCALL(sys_fsync,compat_sys_fsync)
+SYSCALL(sys_sigreturn,compat_sys_sigreturn)
+SYSCALL(sys_clone,compat_sys_clone)			/* 120 */
+SYSCALL(sys_setdomainname,compat_sys_setdomainname)
+SYSCALL(sys_newuname,compat_sys_newuname)
+NI_SYSCALL						/* modify_ldt for i386 */
+SYSCALL(sys_adjtimex,compat_sys_adjtimex)
+SYSCALL(sys_mprotect,compat_sys_mprotect)		/* 125 */
+SYSCALL(sys_sigprocmask,compat_sys_sigprocmask)
+NI_SYSCALL						/* old "create module" */
+SYSCALL(sys_init_module,compat_sys_init_module)
+SYSCALL(sys_delete_module,compat_sys_delete_module)
+NI_SYSCALL						/* 130: old get_kernel_syms */
+SYSCALL(sys_quotactl,compat_sys_quotactl)
+SYSCALL(sys_getpgid,compat_sys_getpgid)
+SYSCALL(sys_fchdir,compat_sys_fchdir)
+SYSCALL(sys_bdflush,compat_sys_bdflush)
+SYSCALL(sys_sysfs,compat_sys_sysfs)			/* 135 */
+SYSCALL(sys_s390_personality,compat_sys_s390_personality)
+NI_SYSCALL						/* for afs_syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setfsuid16)	/* old setfsuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setfsgid16)	/* old setfsgid16 syscall */
+SYSCALL(sys_llseek,compat_sys_llseek)			/* 140 */
+SYSCALL(sys_getdents,compat_sys_getdents)
+SYSCALL(sys_select,compat_sys_select)
+SYSCALL(sys_flock,compat_sys_flock)
+SYSCALL(sys_msync,compat_sys_msync)
+SYSCALL(sys_readv,compat_sys_readv)			/* 145 */
+SYSCALL(sys_writev,compat_sys_writev)
+SYSCALL(sys_getsid,compat_sys_getsid)
+SYSCALL(sys_fdatasync,compat_sys_fdatasync)
+SYSCALL(sys_sysctl,compat_sys_sysctl)
+SYSCALL(sys_mlock,compat_sys_mlock)			/* 150 */
+SYSCALL(sys_munlock,compat_sys_munlock)
+SYSCALL(sys_mlockall,compat_sys_mlockall)
+SYSCALL(sys_munlockall,sys_munlockall)
+SYSCALL(sys_sched_setparam,compat_sys_sched_setparam)
+SYSCALL(sys_sched_getparam,compat_sys_sched_getparam)	/* 155 */
+SYSCALL(sys_sched_setscheduler,compat_sys_sched_setscheduler)
+SYSCALL(sys_sched_getscheduler,compat_sys_sched_getscheduler)
+SYSCALL(sys_sched_yield,sys_sched_yield)
+SYSCALL(sys_sched_get_priority_max,compat_sys_sched_get_priority_max)
+SYSCALL(sys_sched_get_priority_min,compat_sys_sched_get_priority_min)	/* 160 */
+SYSCALL(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval)
+SYSCALL(sys_nanosleep,compat_sys_nanosleep)
+SYSCALL(sys_mremap,compat_sys_mremap)
+SYSCALL(sys_ni_syscall,compat_sys_s390_setresuid16)	/* old setresuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getresuid16)	/* 165 old getresuid16 syscall */
+NI_SYSCALL						/* for vm86 */
+NI_SYSCALL						/* old sys_query_module */
+SYSCALL(sys_poll,compat_sys_poll)
+NI_SYSCALL						/* old nfsservctl */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setresgid16)	/* 170 old setresgid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getresgid16)	/* old getresgid16 syscall */
+SYSCALL(sys_prctl,compat_sys_prctl)
+SYSCALL(sys_rt_sigreturn,compat_sys_rt_sigreturn)
+SYSCALL(sys_rt_sigaction,compat_sys_rt_sigaction)
+SYSCALL(sys_rt_sigprocmask,compat_sys_rt_sigprocmask)	/* 175 */
+SYSCALL(sys_rt_sigpending,compat_sys_rt_sigpending)
+SYSCALL(sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait)
+SYSCALL(sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo)
+SYSCALL(sys_rt_sigsuspend,compat_sys_rt_sigsuspend)
+SYSCALL(sys_pread64,compat_sys_s390_pread64)		/* 180 */
+SYSCALL(sys_pwrite64,compat_sys_s390_pwrite64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_chown16)		/* old chown16 syscall */
+SYSCALL(sys_getcwd,compat_sys_getcwd)
+SYSCALL(sys_capget,compat_sys_capget)
+SYSCALL(sys_capset,compat_sys_capset)			/* 185 */
+SYSCALL(sys_sigaltstack,compat_sys_sigaltstack)
+SYSCALL(sys_sendfile64,compat_sys_sendfile)
+NI_SYSCALL						/* streams1 */
+NI_SYSCALL						/* streams2 */
+SYSCALL(sys_vfork,sys_vfork)				/* 190 */
+SYSCALL(sys_getrlimit,compat_sys_getrlimit)
+SYSCALL(sys_mmap2,compat_sys_s390_mmap2)
+SYSCALL(sys_ni_syscall,compat_sys_s390_truncate64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_ftruncate64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_stat64)		/* 195 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_lstat64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_fstat64)
+SYSCALL(sys_lchown,compat_sys_lchown)
+SYSCALL(sys_getuid,sys_getuid)
+SYSCALL(sys_getgid,sys_getgid)				/* 200 */
+SYSCALL(sys_geteuid,sys_geteuid)
+SYSCALL(sys_getegid,sys_getegid)
+SYSCALL(sys_setreuid,compat_sys_setreuid)
+SYSCALL(sys_setregid,compat_sys_setregid)
+SYSCALL(sys_getgroups,compat_sys_getgroups)		/* 205 */
+SYSCALL(sys_setgroups,compat_sys_setgroups)
+SYSCALL(sys_fchown,compat_sys_fchown)
+SYSCALL(sys_setresuid,compat_sys_setresuid)
+SYSCALL(sys_getresuid,compat_sys_getresuid)
+SYSCALL(sys_setresgid,compat_sys_setresgid)		/* 210 */
+SYSCALL(sys_getresgid,compat_sys_getresgid)
+SYSCALL(sys_chown,compat_sys_chown)
+SYSCALL(sys_setuid,compat_sys_setuid)
+SYSCALL(sys_setgid,compat_sys_setgid)
+SYSCALL(sys_setfsuid,compat_sys_setfsuid)		/* 215 */
+SYSCALL(sys_setfsgid,compat_sys_setfsgid)
+SYSCALL(sys_pivot_root,compat_sys_pivot_root)
+SYSCALL(sys_mincore,compat_sys_mincore)
+SYSCALL(sys_madvise,compat_sys_madvise)
+SYSCALL(sys_getdents64,compat_sys_getdents64)		/* 220 */
+SYSCALL(sys_ni_syscall,compat_sys_fcntl64)
+SYSCALL(sys_readahead,compat_sys_s390_readahead)
+SYSCALL(sys_ni_syscall,compat_sys_sendfile64)
+SYSCALL(sys_setxattr,compat_sys_setxattr)
+SYSCALL(sys_lsetxattr,compat_sys_lsetxattr)		/* 225 */
+SYSCALL(sys_fsetxattr,compat_sys_fsetxattr)
+SYSCALL(sys_getxattr,compat_sys_getxattr)
+SYSCALL(sys_lgetxattr,compat_sys_lgetxattr)
+SYSCALL(sys_fgetxattr,compat_sys_fgetxattr)
+SYSCALL(sys_listxattr,compat_sys_listxattr)		/* 230 */
+SYSCALL(sys_llistxattr,compat_sys_llistxattr)
+SYSCALL(sys_flistxattr,compat_sys_flistxattr)
+SYSCALL(sys_removexattr,compat_sys_removexattr)
+SYSCALL(sys_lremovexattr,compat_sys_lremovexattr)
+SYSCALL(sys_fremovexattr,compat_sys_fremovexattr)	/* 235 */
+SYSCALL(sys_gettid,sys_gettid)
+SYSCALL(sys_tkill,compat_sys_tkill)
+SYSCALL(sys_futex,compat_sys_futex)
+SYSCALL(sys_sched_setaffinity,compat_sys_sched_setaffinity)
+SYSCALL(sys_sched_getaffinity,compat_sys_sched_getaffinity)	/* 240 */
+SYSCALL(sys_tgkill,compat_sys_tgkill)
+NI_SYSCALL						/* reserved for TUX */
+SYSCALL(sys_io_setup,compat_sys_io_setup)
+SYSCALL(sys_io_destroy,compat_sys_io_destroy)
+SYSCALL(sys_io_getevents,compat_sys_io_getevents)	/* 245 */
+SYSCALL(sys_io_submit,compat_sys_io_submit)
+SYSCALL(sys_io_cancel,compat_sys_io_cancel)
+SYSCALL(sys_exit_group,compat_sys_exit_group)
+SYSCALL(sys_epoll_create,compat_sys_epoll_create)
+SYSCALL(sys_epoll_ctl,compat_sys_epoll_ctl)		/* 250 */
+SYSCALL(sys_epoll_wait,compat_sys_epoll_wait)
+SYSCALL(sys_set_tid_address,compat_sys_set_tid_address)
+SYSCALL(sys_fadvise64_64,compat_sys_s390_fadvise64)
+SYSCALL(sys_timer_create,compat_sys_timer_create)
+SYSCALL(sys_timer_settime,compat_sys_timer_settime)	/* 255 */
+SYSCALL(sys_timer_gettime,compat_sys_timer_gettime)
+SYSCALL(sys_timer_getoverrun,compat_sys_timer_getoverrun)
+SYSCALL(sys_timer_delete,compat_sys_timer_delete)
+SYSCALL(sys_clock_settime,compat_sys_clock_settime)
+SYSCALL(sys_clock_gettime,compat_sys_clock_gettime)	/* 260 */
+SYSCALL(sys_clock_getres,compat_sys_clock_getres)
+SYSCALL(sys_clock_nanosleep,compat_sys_clock_nanosleep)
+NI_SYSCALL						/* reserved for vserver */
+SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
+SYSCALL(sys_statfs64,compat_sys_statfs64)
+SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
+SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
+NI_SYSCALL						/* 268 sys_mbind */
+NI_SYSCALL						/* 269 sys_get_mempolicy */
+NI_SYSCALL						/* 270 sys_set_mempolicy */
+SYSCALL(sys_mq_open,compat_sys_mq_open)
+SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
+SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
+SYSCALL(sys_mq_timedreceive,compat_sys_mq_timedreceive)
+SYSCALL(sys_mq_notify,compat_sys_mq_notify)		/* 275 */
+SYSCALL(sys_mq_getsetattr,compat_sys_mq_getsetattr)
+SYSCALL(sys_kexec_load,compat_sys_kexec_load)
+SYSCALL(sys_add_key,compat_sys_add_key)
+SYSCALL(sys_request_key,compat_sys_request_key)
+SYSCALL(sys_keyctl,compat_sys_keyctl)			/* 280 */
+SYSCALL(sys_waitid,compat_sys_waitid)
+SYSCALL(sys_ioprio_set,compat_sys_ioprio_set)
+SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
+SYSCALL(sys_inotify_init,sys_inotify_init)
+SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch)	/* 285 */
+SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
+NI_SYSCALL						/* 287 sys_migrate_pages */
+SYSCALL(sys_openat,compat_sys_openat)
+SYSCALL(sys_mkdirat,compat_sys_mkdirat)
+SYSCALL(sys_mknodat,compat_sys_mknodat)			/* 290 */
+SYSCALL(sys_fchownat,compat_sys_fchownat)
+SYSCALL(sys_futimesat,compat_sys_futimesat)
+SYSCALL(sys_newfstatat,compat_sys_s390_fstatat64)
+SYSCALL(sys_unlinkat,compat_sys_unlinkat)
+SYSCALL(sys_renameat,compat_sys_renameat)		/* 295 */
+SYSCALL(sys_linkat,compat_sys_linkat)
+SYSCALL(sys_symlinkat,compat_sys_symlinkat)
+SYSCALL(sys_readlinkat,compat_sys_readlinkat)
+SYSCALL(sys_fchmodat,compat_sys_fchmodat)
+SYSCALL(sys_faccessat,compat_sys_faccessat)		/* 300 */
+SYSCALL(sys_pselect6,compat_sys_pselect6)
+SYSCALL(sys_ppoll,compat_sys_ppoll)
+SYSCALL(sys_unshare,compat_sys_unshare)
+SYSCALL(sys_set_robust_list,compat_sys_set_robust_list)
+SYSCALL(sys_get_robust_list,compat_sys_get_robust_list)
+SYSCALL(sys_splice,compat_sys_splice)
+SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
+SYSCALL(sys_tee,compat_sys_tee)
+SYSCALL(sys_vmsplice,compat_sys_vmsplice)
+NI_SYSCALL						/* 310 sys_move_pages */
+SYSCALL(sys_getcpu,compat_sys_getcpu)
+SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
+SYSCALL(sys_utimes,compat_sys_utimes)
+SYSCALL(sys_fallocate,compat_sys_s390_fallocate)
+SYSCALL(sys_utimensat,compat_sys_utimensat)		/* 315 */
+SYSCALL(sys_signalfd,compat_sys_signalfd)
+NI_SYSCALL						/* 317 old sys_timer_fd */
+SYSCALL(sys_eventfd,compat_sys_eventfd)
+SYSCALL(sys_timerfd_create,compat_sys_timerfd_create)
+SYSCALL(sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */
+SYSCALL(sys_timerfd_gettime,compat_sys_timerfd_gettime)
+SYSCALL(sys_signalfd4,compat_sys_signalfd4)
+SYSCALL(sys_eventfd2,compat_sys_eventfd2)
+SYSCALL(sys_inotify_init1,compat_sys_inotify_init1)
+SYSCALL(sys_pipe2,compat_sys_pipe2)			/* 325 */
+SYSCALL(sys_dup3,compat_sys_dup3)
+SYSCALL(sys_epoll_create1,compat_sys_epoll_create1)
+SYSCALL(sys_preadv,compat_sys_preadv)
+SYSCALL(sys_pwritev,compat_sys_pwritev)
+SYSCALL(sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */
+SYSCALL(sys_perf_event_open,compat_sys_perf_event_open)
+SYSCALL(sys_fanotify_init,compat_sys_fanotify_init)
+SYSCALL(sys_fanotify_mark,compat_sys_fanotify_mark)
+SYSCALL(sys_prlimit64,compat_sys_prlimit64)
+SYSCALL(sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */
+SYSCALL(sys_open_by_handle_at,compat_sys_open_by_handle_at)
+SYSCALL(sys_clock_adjtime,compat_sys_clock_adjtime)
+SYSCALL(sys_syncfs,compat_sys_syncfs)
+SYSCALL(sys_setns,compat_sys_setns)
+SYSCALL(sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */
+SYSCALL(sys_process_vm_writev,compat_sys_process_vm_writev)
+SYSCALL(sys_s390_runtime_instr,compat_sys_s390_runtime_instr)
+SYSCALL(sys_kcmp,compat_sys_kcmp)
+SYSCALL(sys_finit_module,compat_sys_finit_module)
+SYSCALL(sys_sched_setattr,compat_sys_sched_setattr)	/* 345 */
+SYSCALL(sys_sched_getattr,compat_sys_sched_getattr)
+SYSCALL(sys_renameat2,compat_sys_renameat2)
+SYSCALL(sys_seccomp,compat_sys_seccomp)
+SYSCALL(sys_getrandom,compat_sys_getrandom)
+SYSCALL(sys_memfd_create,compat_sys_memfd_create)	/* 350 */
+SYSCALL(sys_bpf,compat_sys_bpf)
+SYSCALL(sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
+SYSCALL(sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
+SYSCALL(sys_execveat,compat_sys_execveat)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
new file mode 100644
index 000000000..99babea02
--- /dev/null
+++ b/arch/s390/kernel/sysinfo.c
@@ -0,0 +1,465 @@
+/*
+ *  Copyright IBM Corp. 2001, 2009
+ *  Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>,
+ *	       Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/topology.h>
+
+/* Sigh, math-emu. Don't ask. */
+#include <asm/sfp-util.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+int topology_max_mnest;
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	register int r0 asm("0") = (fc << 28) | sel1;
+	register int r1 asm("1") = sel2;
+	int rc = 0;
+
+	asm volatile(
+		"	stsi	0(%3)\n"
+		"0:	jz	2f\n"
+		"1:	lhi	%1,%4\n"
+		"2:\n"
+		EX_TABLE(0b, 1b)
+		: "+d" (r0), "+d" (rc)
+		: "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
+		: "cc", "memory");
+	if (rc)
+		return rc;
+	return fc ? 0 : ((unsigned int) r0) >> 28;
+}
+EXPORT_SYMBOL(stsi);
+
+static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
+{
+	int i;
+
+	if (stsi(info, 1, 1, 1))
+		return;
+	EBCASC(info->manufacturer, sizeof(info->manufacturer));
+	EBCASC(info->type, sizeof(info->type));
+	EBCASC(info->model, sizeof(info->model));
+	EBCASC(info->sequence, sizeof(info->sequence));
+	EBCASC(info->plant, sizeof(info->plant));
+	EBCASC(info->model_capacity, sizeof(info->model_capacity));
+	EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
+	EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+	seq_printf(m, "Manufacturer:         %-16.16s\n", info->manufacturer);
+	seq_printf(m, "Type:                 %-4.4s\n", info->type);
+	/*
+	 * Sigh: the model field has been renamed with System z9
+	 * to model_capacity and a new model field has been added
+	 * after the plant field. To avoid confusing older programs
+	 * the "Model:" prints "model_capacity model" or just
+	 * "model_capacity" if the model string is empty .
+	 */
+	seq_printf(m, "Model:                %-16.16s", info->model_capacity);
+	if (info->model[0] != '\0')
+		seq_printf(m, " %-16.16s", info->model);
+	seq_putc(m, '\n');
+	seq_printf(m, "Sequence Code:        %-16.16s\n", info->sequence);
+	seq_printf(m, "Plant:                %-4.4s\n", info->plant);
+	seq_printf(m, "Model Capacity:       %-16.16s %08u\n",
+		   info->model_capacity, info->model_cap_rating);
+	if (info->model_perm_cap_rating)
+		seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n",
+			   info->model_perm_cap,
+			   info->model_perm_cap_rating);
+	if (info->model_temp_cap_rating)
+		seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
+			   info->model_temp_cap,
+			   info->model_temp_cap_rating);
+	if (info->ncr)
+		seq_printf(m, "Nominal Cap. Rating:  %08u\n", info->ncr);
+	if (info->npr)
+		seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
+	if (info->ntr)
+		seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+	if (info->cai) {
+		seq_printf(m, "Capacity Adj. Ind.:   %d\n", info->cai);
+		seq_printf(m, "Capacity Ch. Reason:  %d\n", info->ccr);
+		seq_printf(m, "Capacity Transient:   %d\n", info->t);
+	}
+	if (info->p) {
+		for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) {
+			seq_printf(m, "Type %d Percentage:    %d\n",
+				   i, info->typepct[i - 1]);
+		}
+	}
+}
+
+static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
+{
+	static int max_mnest;
+	int i, rc;
+
+	seq_putc(m, '\n');
+	if (!MACHINE_HAS_TOPOLOGY)
+		return;
+	if (stsi(info, 15, 1, topology_max_mnest))
+		return;
+	seq_printf(m, "CPU Topology HW:     ");
+	for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+		seq_printf(m, " %d", info->mag[i]);
+	seq_putc(m, '\n');
+#ifdef CONFIG_SCHED_MC
+	store_topology(info);
+	seq_printf(m, "CPU Topology SW:     ");
+	for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+		seq_printf(m, " %d", info->mag[i]);
+	seq_putc(m, '\n');
+#endif
+}
+
+static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
+{
+	struct sysinfo_1_2_2_extension *ext;
+	int i;
+
+	if (stsi(info, 1, 2, 2))
+		return;
+	ext = (struct sysinfo_1_2_2_extension *)
+		((unsigned long) info + info->acc_offset);
+	seq_printf(m, "CPUs Total:           %d\n", info->cpus_total);
+	seq_printf(m, "CPUs Configured:      %d\n", info->cpus_configured);
+	seq_printf(m, "CPUs Standby:         %d\n", info->cpus_standby);
+	seq_printf(m, "CPUs Reserved:        %d\n", info->cpus_reserved);
+	/*
+	 * Sigh 2. According to the specification the alternate
+	 * capability field is a 32 bit floating point number
+	 * if the higher order 8 bits are not zero. Printing
+	 * a floating point number in the kernel is a no-no,
+	 * always print the number as 32 bit unsigned integer.
+	 * The user-space needs to know about the strange
+	 * encoding of the alternate cpu capability.
+	 */
+	seq_printf(m, "Capability:           %u", info->capability);
+	if (info->format == 1)
+		seq_printf(m, " %u", ext->alt_capability);
+	seq_putc(m, '\n');
+	if (info->nominal_cap)
+		seq_printf(m, "Nominal Capability:   %d\n", info->nominal_cap);
+	if (info->secondary_cap)
+		seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap);
+	for (i = 2; i <= info->cpus_total; i++) {
+		seq_printf(m, "Adjustment %02d-way:    %u",
+			   i, info->adjustment[i-2]);
+		if (info->format == 1)
+			seq_printf(m, " %u", ext->alt_adjustment[i-2]);
+		seq_putc(m, '\n');
+	}
+}
+
+static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
+{
+	if (stsi(info, 2, 2, 2))
+		return;
+	EBCASC(info->name, sizeof(info->name));
+	seq_putc(m, '\n');
+	seq_printf(m, "LPAR Number:          %d\n", info->lpar_number);
+	seq_printf(m, "LPAR Characteristics: ");
+	if (info->characteristics & LPAR_CHAR_DEDICATED)
+		seq_printf(m, "Dedicated ");
+	if (info->characteristics & LPAR_CHAR_SHARED)
+		seq_printf(m, "Shared ");
+	if (info->characteristics & LPAR_CHAR_LIMITED)
+		seq_printf(m, "Limited ");
+	seq_putc(m, '\n');
+	seq_printf(m, "LPAR Name:            %-8.8s\n", info->name);
+	seq_printf(m, "LPAR Adjustment:      %d\n", info->caf);
+	seq_printf(m, "LPAR CPUs Total:      %d\n", info->cpus_total);
+	seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured);
+	seq_printf(m, "LPAR CPUs Standby:    %d\n", info->cpus_standby);
+	seq_printf(m, "LPAR CPUs Reserved:   %d\n", info->cpus_reserved);
+	seq_printf(m, "LPAR CPUs Dedicated:  %d\n", info->cpus_dedicated);
+	seq_printf(m, "LPAR CPUs Shared:     %d\n", info->cpus_shared);
+	if (info->mt_installed & 0x80) {
+		seq_printf(m, "LPAR CPUs G-MTID:     %d\n",
+			   info->mt_general & 0x1f);
+		seq_printf(m, "LPAR CPUs S-MTID:     %d\n",
+			   info->mt_installed & 0x1f);
+		seq_printf(m, "LPAR CPUs PS-MTID:    %d\n",
+			   info->mt_psmtid & 0x1f);
+	}
+}
+
+static void print_ext_name(struct seq_file *m, int lvl,
+			   struct sysinfo_3_2_2 *info)
+{
+	if (info->vm[lvl].ext_name_encoding == 0)
+		return;
+	if (info->ext_names[lvl][0] == 0)
+		return;
+	switch (info->vm[lvl].ext_name_encoding) {
+	case 1: /* EBCDIC */
+		EBCASC(info->ext_names[lvl], sizeof(info->ext_names[lvl]));
+		break;
+	case 2:	/* UTF-8 */
+		break;
+	default:
+		return;
+	}
+	seq_printf(m, "VM%02d Extended Name:   %-.256s\n", lvl,
+		   info->ext_names[lvl]);
+}
+
+static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
+{
+	if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
+		return;
+	seq_printf(m, "VM%02d UUID:            %pUb\n", i, &info->vm[i].uuid);
+}
+
+static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
+{
+	int i;
+
+	if (stsi(info, 3, 2, 2))
+		return;
+	for (i = 0; i < info->count; i++) {
+		EBCASC(info->vm[i].name, sizeof(info->vm[i].name));
+		EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi));
+		seq_putc(m, '\n');
+		seq_printf(m, "VM%02d Name:            %-8.8s\n", i, info->vm[i].name);
+		seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi);
+		seq_printf(m, "VM%02d Adjustment:      %d\n", i, info->vm[i].caf);
+		seq_printf(m, "VM%02d CPUs Total:      %d\n", i, info->vm[i].cpus_total);
+		seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
+		seq_printf(m, "VM%02d CPUs Standby:    %d\n", i, info->vm[i].cpus_standby);
+		seq_printf(m, "VM%02d CPUs Reserved:   %d\n", i, info->vm[i].cpus_reserved);
+		print_ext_name(m, i, info);
+		print_uuid(m, i, info);
+	}
+}
+
+static int sysinfo_show(struct seq_file *m, void *v)
+{
+	void *info = (void *)get_zeroed_page(GFP_KERNEL);
+	int level;
+
+	if (!info)
+		return 0;
+	level = stsi(NULL, 0, 0, 0);
+	if (level >= 1)
+		stsi_1_1_1(m, info);
+	if (level >= 1)
+		stsi_15_1_x(m, info);
+	if (level >= 1)
+		stsi_1_2_2(m, info);
+	if (level >= 2)
+		stsi_2_2_2(m, info);
+	if (level >= 3)
+		stsi_3_2_2(m, info);
+	free_page((unsigned long)info);
+	return 0;
+}
+
+static int sysinfo_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, sysinfo_show, NULL);
+}
+
+static const struct file_operations sysinfo_fops = {
+	.open		= sysinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init sysinfo_create_proc(void)
+{
+	proc_create("sysinfo", 0444, NULL, &sysinfo_fops);
+	return 0;
+}
+device_initcall(sysinfo_create_proc);
+
+/*
+ * Service levels interface.
+ */
+
+static DECLARE_RWSEM(service_level_sem);
+static LIST_HEAD(service_level_list);
+
+int register_service_level(struct service_level *slr)
+{
+	struct service_level *ptr;
+
+	down_write(&service_level_sem);
+	list_for_each_entry(ptr, &service_level_list, list)
+		if (ptr == slr) {
+			up_write(&service_level_sem);
+			return -EEXIST;
+		}
+	list_add_tail(&slr->list, &service_level_list);
+	up_write(&service_level_sem);
+	return 0;
+}
+EXPORT_SYMBOL(register_service_level);
+
+int unregister_service_level(struct service_level *slr)
+{
+	struct service_level *ptr, *next;
+	int rc = -ENOENT;
+
+	down_write(&service_level_sem);
+	list_for_each_entry_safe(ptr, next, &service_level_list, list) {
+		if (ptr != slr)
+			continue;
+		list_del(&ptr->list);
+		rc = 0;
+		break;
+	}
+	up_write(&service_level_sem);
+	return rc;
+}
+EXPORT_SYMBOL(unregister_service_level);
+
+static void *service_level_start(struct seq_file *m, loff_t *pos)
+{
+	down_read(&service_level_sem);
+	return seq_list_start(&service_level_list, *pos);
+}
+
+static void *service_level_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	return seq_list_next(p, &service_level_list, pos);
+}
+
+static void service_level_stop(struct seq_file *m, void *p)
+{
+	up_read(&service_level_sem);
+}
+
+static int service_level_show(struct seq_file *m, void *p)
+{
+	struct service_level *slr;
+
+	slr = list_entry(p, struct service_level, list);
+	slr->seq_print(m, slr);
+	return 0;
+}
+
+static const struct seq_operations service_level_seq_ops = {
+	.start		= service_level_start,
+	.next		= service_level_next,
+	.stop		= service_level_stop,
+	.show		= service_level_show
+};
+
+static int service_level_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &service_level_seq_ops);
+}
+
+static const struct file_operations service_level_ops = {
+	.open		= service_level_open,
+	.read		= seq_read,
+	.llseek 	= seq_lseek,
+	.release	= seq_release
+};
+
+static void service_level_vm_print(struct seq_file *m,
+				   struct service_level *slr)
+{
+	char *query_buffer, *str;
+
+	query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+	if (!query_buffer)
+		return;
+	cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
+	str = strchr(query_buffer, '\n');
+	if (str)
+		*str = 0;
+	seq_printf(m, "VM: %s\n", query_buffer);
+	kfree(query_buffer);
+}
+
+static struct service_level service_level_vm = {
+	.seq_print = service_level_vm_print
+};
+
+static __init int create_proc_service_level(void)
+{
+	proc_create("service_levels", 0, NULL, &service_level_ops);
+	if (MACHINE_IS_VM)
+		register_service_level(&service_level_vm);
+	return 0;
+}
+subsys_initcall(create_proc_service_level);
+
+/*
+ * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
+ */
+void s390_adjust_jiffies(void)
+{
+	struct sysinfo_1_2_2 *info;
+	const unsigned int fmil = 0x4b189680;	/* 1e7 as 32-bit float. */
+	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+	FP_DECL_EX;
+	unsigned int capability;
+
+	info = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!info)
+		return;
+
+	if (stsi(info, 1, 2, 2) == 0) {
+		/*
+		 * Major sigh. The cpu capability encoding is "special".
+		 * If the first 9 bits of info->capability are 0 then it
+		 * is a 32 bit unsigned integer in the range 0 .. 2^23.
+		 * If the first 9 bits are != 0 then it is a 32 bit float.
+		 * In addition a lower value indicates a proportionally
+		 * higher cpu capacity. Bogomips are the other way round.
+		 * To get to a halfway suitable number we divide 1e7
+		 * by the cpu capability number. Yes, that means a floating
+		 * point division .. math-emu here we come :-)
+		 */
+		FP_UNPACK_SP(SA, &fmil);
+		if ((info->capability >> 23) == 0)
+			FP_FROM_INT_S(SB, (long) info->capability, 64, long);
+		else
+			FP_UNPACK_SP(SB, &info->capability);
+		FP_DIV_S(SR, SA, SB);
+		FP_TO_INT_S(capability, SR, 32, 0);
+	} else
+		/*
+		 * Really old machine without stsi block for basic
+		 * cpu information. Report 42.0 bogomips.
+		 */
+		capability = 42;
+	loops_per_jiffy = capability * (500000/HZ);
+	free_page((unsigned long) info);
+}
+
+/*
+ * calibrate the delay loop
+ */
+void calibrate_delay(void)
+{
+	s390_adjust_jiffies();
+	/* Print the good old Bogomips line .. */
+	printk(KERN_DEBUG "Calibrating delay loop (skipped)... "
+	       "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
+	       (loops_per_jiffy/(5000/HZ)) % 100);
+}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
new file mode 100644
index 000000000..170ddd201
--- /dev/null
+++ b/arch/s390/kernel/time.c
@@ -0,0 +1,1802 @@
+/*
+ *    Time of day based timer functions.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2008
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ *  Derived from "arch/i386/kernel/time.c"
+ *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ */
+
+#define KMSG_COMPONENT "time"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/stop_machine.h>
+#include <linux/time.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+#include <linux/notifier.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/clockchips.h>
+#include <linux/gfp.h>
+#include <linux/kprobes.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+#include <asm/div64.h>
+#include <asm/vdso.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/vtimer.h>
+#include <asm/etr.h>
+#include <asm/cio.h>
+#include "entry.h"
+
+/* change this if you have some constant time drift */
+#define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
+#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12)
+
+u64 sched_clock_base_cc = -1;	/* Force to data section. */
+EXPORT_SYMBOL_GPL(sched_clock_base_cc);
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long notrace sched_clock(void)
+{
+	return tod_to_ns(get_tod_clock_monotonic());
+}
+NOKPROBE_SYMBOL(sched_clock);
+
+/*
+ * Monotonic_clock - returns # of nanoseconds passed since time_init()
+ */
+unsigned long long monotonic_clock(void)
+{
+	return sched_clock();
+}
+EXPORT_SYMBOL(monotonic_clock);
+
+void tod_to_timeval(__u64 todval, struct timespec *xt)
+{
+	unsigned long long sec;
+
+	sec = todval >> 12;
+	do_div(sec, 1000000);
+	xt->tv_sec = sec;
+	todval -= (sec * 1000000) << 12;
+	xt->tv_nsec = ((todval * 1000) >> 12);
+}
+EXPORT_SYMBOL(tod_to_timeval);
+
+void clock_comparator_work(void)
+{
+	struct clock_event_device *cd;
+
+	S390_lowcore.clock_comparator = -1ULL;
+	cd = this_cpu_ptr(&comparators);
+	cd->event_handler(cd);
+}
+
+/*
+ * Fixup the clock comparator.
+ */
+static void fixup_clock_comparator(unsigned long long delta)
+{
+	/* If nobody is waiting there's nothing to fix. */
+	if (S390_lowcore.clock_comparator == -1ULL)
+		return;
+	S390_lowcore.clock_comparator += delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+static int s390_next_event(unsigned long delta,
+			   struct clock_event_device *evt)
+{
+	S390_lowcore.clock_comparator = get_tod_clock() + delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return 0;
+}
+
+static void s390_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt)
+{
+}
+
+/*
+ * Set up lowcore and control register of the current cpu to
+ * enable TOD clock and clock comparator interrupts.
+ */
+void init_cpu_timer(void)
+{
+	struct clock_event_device *cd;
+	int cpu;
+
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	cpu = smp_processor_id();
+	cd = &per_cpu(comparators, cpu);
+	cd->name		= "comparator";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->mult		= 16777;
+	cd->shift		= 12;
+	cd->min_delta_ns	= 1;
+	cd->max_delta_ns	= LONG_MAX;
+	cd->rating		= 400;
+	cd->cpumask		= cpumask_of(cpu);
+	cd->set_next_event	= s390_next_event;
+	cd->set_mode		= s390_set_mode;
+
+	clockevents_register_device(cd);
+
+	/* Enable clock comparator timer interrupt. */
+	__ctl_set_bit(0,11);
+
+	/* Always allow the timing alert external interrupt. */
+	__ctl_set_bit(0, 4);
+}
+
+static void clock_comparator_interrupt(struct ext_code ext_code,
+				       unsigned int param32,
+				       unsigned long param64)
+{
+	inc_irq_stat(IRQEXT_CLK);
+	if (S390_lowcore.clock_comparator == -1ULL)
+		set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+static void etr_timing_alert(struct etr_irq_parm *);
+static void stp_timing_alert(struct stp_irq_parm *);
+
+static void timing_alert_interrupt(struct ext_code ext_code,
+				   unsigned int param32, unsigned long param64)
+{
+	inc_irq_stat(IRQEXT_TLA);
+	if (param32 & 0x00c40000)
+		etr_timing_alert((struct etr_irq_parm *) &param32);
+	if (param32 & 0x00038000)
+		stp_timing_alert((struct stp_irq_parm *) &param32);
+}
+
+static void etr_reset(void);
+static void stp_reset(void);
+
+void read_persistent_clock(struct timespec *ts)
+{
+	tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
+}
+
+void read_boot_clock(struct timespec *ts)
+{
+	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+}
+
+static cycle_t read_tod_clock(struct clocksource *cs)
+{
+	return get_tod_clock();
+}
+
+static struct clocksource clocksource_tod = {
+	.name		= "tod",
+	.rating		= 400,
+	.read		= read_tod_clock,
+	.mask		= -1ULL,
+	.mult		= 1000,
+	.shift		= 12,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+struct clocksource * __init clocksource_default_clock(void)
+{
+	return &clocksource_tod;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+	u64 nsecps;
+
+	if (tk->tkr_mono.clock != &clocksource_tod)
+		return;
+
+	/* Make userspace gettimeofday spin until we're done. */
+	++vdso_data->tb_update_count;
+	smp_wmb();
+	vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
+	vdso_data->xtime_clock_sec = tk->xtime_sec;
+	vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
+	vdso_data->wtom_clock_sec =
+		tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+	vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+	nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
+	while (vdso_data->wtom_clock_nsec >= nsecps) {
+		vdso_data->wtom_clock_nsec -= nsecps;
+		vdso_data->wtom_clock_sec++;
+	}
+
+	vdso_data->xtime_coarse_sec = tk->xtime_sec;
+	vdso_data->xtime_coarse_nsec =
+		(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
+	vdso_data->wtom_coarse_sec =
+		vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
+	vdso_data->wtom_coarse_nsec =
+		vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+	while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) {
+		vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC;
+		vdso_data->wtom_coarse_sec++;
+	}
+
+	vdso_data->tk_mult = tk->tkr_mono.mult;
+	vdso_data->tk_shift = tk->tkr_mono.shift;
+	smp_wmb();
+	++vdso_data->tb_update_count;
+}
+
+extern struct timezone sys_tz;
+
+void update_vsyscall_tz(void)
+{
+	/* Make userspace gettimeofday spin until we're done. */
+	++vdso_data->tb_update_count;
+	smp_wmb();
+	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+	smp_wmb();
+	++vdso_data->tb_update_count;
+}
+
+/*
+ * Initialize the TOD clock and the CPU timer of
+ * the boot cpu.
+ */
+void __init time_init(void)
+{
+	/* Reset time synchronization interfaces. */
+	etr_reset();
+	stp_reset();
+
+	/* request the clock comparator external interrupt */
+	if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+		panic("Couldn't request external interrupt 0x1004");
+
+	/* request the timing alert external interrupt */
+	if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
+		panic("Couldn't request external interrupt 0x1406");
+
+	if (__clocksource_register(&clocksource_tod) != 0)
+		panic("Could not register TOD clock source");
+
+	/* Enable TOD clock interrupts on the boot cpu. */
+	init_cpu_timer();
+
+	/* Enable cpu timer interrupts on the boot cpu. */
+	vtime_init();
+}
+
+/*
+ * The time is "clock". old is what we think the time is.
+ * Adjust the value by a multiple of jiffies and add the delta to ntp.
+ * "delay" is an approximation how long the synchronization took. If
+ * the time correction is positive, then "delay" is subtracted from
+ * the time difference and only the remaining part is passed to ntp.
+ */
+static unsigned long long adjust_time(unsigned long long old,
+				      unsigned long long clock,
+				      unsigned long long delay)
+{
+	unsigned long long delta, ticks;
+	struct timex adjust;
+
+	if (clock > old) {
+		/* It is later than we thought. */
+		delta = ticks = clock - old;
+		delta = ticks = (delta < delay) ? 0 : delta - delay;
+		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+		adjust.offset = ticks * (1000000 / HZ);
+	} else {
+		/* It is earlier than we thought. */
+		delta = ticks = old - clock;
+		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+		delta = -delta;
+		adjust.offset = -ticks * (1000000 / HZ);
+	}
+	sched_clock_base_cc += delta;
+	if (adjust.offset != 0) {
+		pr_notice("The ETR interface has adjusted the clock "
+			  "by %li microseconds\n", adjust.offset);
+		adjust.modes = ADJ_OFFSET_SINGLESHOT;
+		do_adjtimex(&adjust);
+	}
+	return delta;
+}
+
+static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static DEFINE_MUTEX(clock_sync_mutex);
+static unsigned long clock_sync_flags;
+
+#define CLOCK_SYNC_HAS_ETR	0
+#define CLOCK_SYNC_HAS_STP	1
+#define CLOCK_SYNC_ETR		2
+#define CLOCK_SYNC_STP		3
+
+/*
+ * The synchronous get_clock function. It will write the current clock
+ * value to the clock pointer and return 0 if the clock is in sync with
+ * the external time source. If the clock mode is local it will return
+ * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
+ * reference.
+ */
+int get_sync_clock(unsigned long long *clock)
+{
+	atomic_t *sw_ptr;
+	unsigned int sw0, sw1;
+
+	sw_ptr = &get_cpu_var(clock_sync_word);
+	sw0 = atomic_read(sw_ptr);
+	*clock = get_tod_clock();
+	sw1 = atomic_read(sw_ptr);
+	put_cpu_var(clock_sync_word);
+	if (sw0 == sw1 && (sw0 & 0x80000000U))
+		/* Success: time is in sync. */
+		return 0;
+	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
+	    !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return -EOPNOTSUPP;
+	if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
+	    !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+		return -EACCES;
+	return -EAGAIN;
+}
+EXPORT_SYMBOL(get_sync_clock);
+
+/*
+ * Make get_sync_clock return -EAGAIN.
+ */
+static void disable_sync_clock(void *dummy)
+{
+	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+	/*
+	 * Clear the in-sync bit 2^31. All get_sync_clock calls will
+	 * fail until the sync bit is turned back on. In addition
+	 * increase the "sequence" counter to avoid the race of an
+	 * etr event and the complete recovery against get_sync_clock.
+	 */
+	atomic_clear_mask(0x80000000, sw_ptr);
+	atomic_inc(sw_ptr);
+}
+
+/*
+ * Make get_sync_clock return 0 again.
+ * Needs to be called from a context disabled for preemption.
+ */
+static void enable_sync_clock(void)
+{
+	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+	atomic_set_mask(0x80000000, sw_ptr);
+}
+
+/*
+ * Function to check if the clock is in sync.
+ */
+static inline int check_sync_clock(void)
+{
+	atomic_t *sw_ptr;
+	int rc;
+
+	sw_ptr = &get_cpu_var(clock_sync_word);
+	rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
+	put_cpu_var(clock_sync_word);
+	return rc;
+}
+
+/* Single threaded workqueue used for etr and stp sync events */
+static struct workqueue_struct *time_sync_wq;
+
+static void __init time_init_wq(void)
+{
+	if (time_sync_wq)
+		return;
+	time_sync_wq = create_singlethread_workqueue("timesync");
+}
+
+/*
+ * External Time Reference (ETR) code.
+ */
+static int etr_port0_online;
+static int etr_port1_online;
+static int etr_steai_available;
+
+static int __init early_parse_etr(char *p)
+{
+	if (strncmp(p, "off", 3) == 0)
+		etr_port0_online = etr_port1_online = 0;
+	else if (strncmp(p, "port0", 5) == 0)
+		etr_port0_online = 1;
+	else if (strncmp(p, "port1", 5) == 0)
+		etr_port1_online = 1;
+	else if (strncmp(p, "on", 2) == 0)
+		etr_port0_online = etr_port1_online = 1;
+	return 0;
+}
+early_param("etr", early_parse_etr);
+
+enum etr_event {
+	ETR_EVENT_PORT0_CHANGE,
+	ETR_EVENT_PORT1_CHANGE,
+	ETR_EVENT_PORT_ALERT,
+	ETR_EVENT_SYNC_CHECK,
+	ETR_EVENT_SWITCH_LOCAL,
+	ETR_EVENT_UPDATE,
+};
+
+/*
+ * Valid bit combinations of the eacr register are (x = don't care):
+ * e0 e1 dp p0 p1 ea es sl
+ *  0  0  x  0	0  0  0  0  initial, disabled state
+ *  0  0  x  0	1  1  0  0  port 1 online
+ *  0  0  x  1	0  1  0  0  port 0 online
+ *  0  0  x  1	1  1  0  0  both ports online
+ *  0  1  x  0	1  1  0  0  port 1 online and usable, ETR or PPS mode
+ *  0  1  x  0	1  1  0  1  port 1 online, usable and ETR mode
+ *  0  1  x  0	1  1  1  0  port 1 online, usable, PPS mode, in-sync
+ *  0  1  x  0	1  1  1  1  port 1 online, usable, ETR mode, in-sync
+ *  0  1  x  1	1  1  0  0  both ports online, port 1 usable
+ *  0  1  x  1	1  1  1  0  both ports online, port 1 usable, PPS mode, in-sync
+ *  0  1  x  1	1  1  1  1  both ports online, port 1 usable, ETR mode, in-sync
+ *  1  0  x  1	0  1  0  0  port 0 online and usable, ETR or PPS mode
+ *  1  0  x  1	0  1  0  1  port 0 online, usable and ETR mode
+ *  1  0  x  1	0  1  1  0  port 0 online, usable, PPS mode, in-sync
+ *  1  0  x  1	0  1  1  1  port 0 online, usable, ETR mode, in-sync
+ *  1  0  x  1	1  1  0  0  both ports online, port 0 usable
+ *  1  0  x  1	1  1  1  0  both ports online, port 0 usable, PPS mode, in-sync
+ *  1  0  x  1	1  1  1  1  both ports online, port 0 usable, ETR mode, in-sync
+ *  1  1  x  1	1  1  1  0  both ports online & usable, ETR, in-sync
+ *  1  1  x  1	1  1  1  1  both ports online & usable, ETR, in-sync
+ */
+static struct etr_eacr etr_eacr;
+static u64 etr_tolec;			/* time of last eacr update */
+static struct etr_aib etr_port0;
+static int etr_port0_uptodate;
+static struct etr_aib etr_port1;
+static int etr_port1_uptodate;
+static unsigned long etr_events;
+static struct timer_list etr_timer;
+
+static void etr_timeout(unsigned long dummy);
+static void etr_work_fn(struct work_struct *work);
+static DEFINE_MUTEX(etr_work_mutex);
+static DECLARE_WORK(etr_work, etr_work_fn);
+
+/*
+ * Reset ETR attachment.
+ */
+static void etr_reset(void)
+{
+	etr_eacr =  (struct etr_eacr) {
+		.e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
+		.p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
+		.es = 0, .sl = 0 };
+	if (etr_setr(&etr_eacr) == 0) {
+		etr_tolec = get_tod_clock();
+		set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
+		if (etr_port0_online && etr_port1_online)
+			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+	} else if (etr_port0_online || etr_port1_online) {
+		pr_warning("The real or virtual hardware system does "
+			   "not provide an ETR interface\n");
+		etr_port0_online = etr_port1_online = 0;
+	}
+}
+
+static int __init etr_init(void)
+{
+	struct etr_aib aib;
+
+	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
+		return 0;
+	time_init_wq();
+	/* Check if this machine has the steai instruction. */
+	if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
+		etr_steai_available = 1;
+	setup_timer(&etr_timer, etr_timeout, 0UL);
+	if (etr_port0_online) {
+		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
+		queue_work(time_sync_wq, &etr_work);
+	}
+	if (etr_port1_online) {
+		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
+		queue_work(time_sync_wq, &etr_work);
+	}
+	return 0;
+}
+
+arch_initcall(etr_init);
+
+/*
+ * Two sorts of ETR machine checks. The architecture reads:
+ * "When a machine-check niterruption occurs and if a switch-to-local or
+ *  ETR-sync-check interrupt request is pending but disabled, this pending
+ *  disabled interruption request is indicated and is cleared".
+ * Which means that we can get etr_switch_to_local events from the machine
+ * check handler although the interruption condition is disabled. Lovely..
+ */
+
+/*
+ * Switch to local machine check. This is called when the last usable
+ * ETR port goes inactive. After switch to local the clock is not in sync.
+ */
+void etr_switch_to_local(void)
+{
+	if (!etr_eacr.sl)
+		return;
+	disable_sync_clock(NULL);
+	if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
+		etr_eacr.es = etr_eacr.sl = 0;
+		etr_setr(&etr_eacr);
+		queue_work(time_sync_wq, &etr_work);
+	}
+}
+
+/*
+ * ETR sync check machine check. This is called when the ETR OTE and the
+ * local clock OTE are farther apart than the ETR sync check tolerance.
+ * After a ETR sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+void etr_sync_check(void)
+{
+	if (!etr_eacr.es)
+		return;
+	disable_sync_clock(NULL);
+	if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
+		etr_eacr.es = 0;
+		etr_setr(&etr_eacr);
+		queue_work(time_sync_wq, &etr_work);
+	}
+}
+
+/*
+ * ETR timing alert. There are two causes:
+ * 1) port state change, check the usability of the port
+ * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
+ *    sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
+ *    or ETR-data word 4 (edf4) has changed.
+ */
+static void etr_timing_alert(struct etr_irq_parm *intparm)
+{
+	if (intparm->pc0)
+		/* ETR port 0 state change. */
+		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
+	if (intparm->pc1)
+		/* ETR port 1 state change. */
+		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
+	if (intparm->eai)
+		/*
+		 * ETR port alert on either port 0, 1 or both.
+		 * Both ports are not up-to-date now.
+		 */
+		set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
+	queue_work(time_sync_wq, &etr_work);
+}
+
+static void etr_timeout(unsigned long dummy)
+{
+	set_bit(ETR_EVENT_UPDATE, &etr_events);
+	queue_work(time_sync_wq, &etr_work);
+}
+
+/*
+ * Check if the etr mode is pss.
+ */
+static inline int etr_mode_is_pps(struct etr_eacr eacr)
+{
+	return eacr.es && !eacr.sl;
+}
+
+/*
+ * Check if the etr mode is etr.
+ */
+static inline int etr_mode_is_etr(struct etr_eacr eacr)
+{
+	return eacr.es && eacr.sl;
+}
+
+/*
+ * Check if the port can be used for TOD synchronization.
+ * For PPS mode the port has to receive OTEs. For ETR mode
+ * the port has to receive OTEs, the ETR stepping bit has to
+ * be zero and the validity bits for data frame 1, 2, and 3
+ * have to be 1.
+ */
+static int etr_port_valid(struct etr_aib *aib, int port)
+{
+	unsigned int psc;
+
+	/* Check that this port is receiving OTEs. */
+	if (aib->tsp == 0)
+		return 0;
+
+	psc = port ? aib->esw.psc1 : aib->esw.psc0;
+	if (psc == etr_lpsc_pps_mode)
+		return 1;
+	if (psc == etr_lpsc_operational_step)
+		return !aib->esw.y && aib->slsw.v1 &&
+			aib->slsw.v2 && aib->slsw.v3;
+	return 0;
+}
+
+/*
+ * Check if two ports are on the same network.
+ */
+static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2)
+{
+	// FIXME: any other fields we have to compare?
+	return aib1->edf1.net_id == aib2->edf1.net_id;
+}
+
+/*
+ * Wrapper for etr_stei that converts physical port states
+ * to logical port states to be consistent with the output
+ * of stetr (see etr_psc vs. etr_lpsc).
+ */
+static void etr_steai_cv(struct etr_aib *aib, unsigned int func)
+{
+	BUG_ON(etr_steai(aib, func) != 0);
+	/* Convert port state to logical port state. */
+	if (aib->esw.psc0 == 1)
+		aib->esw.psc0 = 2;
+	else if (aib->esw.psc0 == 0 && aib->esw.p == 0)
+		aib->esw.psc0 = 1;
+	if (aib->esw.psc1 == 1)
+		aib->esw.psc1 = 2;
+	else if (aib->esw.psc1 == 0 && aib->esw.p == 1)
+		aib->esw.psc1 = 1;
+}
+
+/*
+ * Check if the aib a2 is still connected to the same attachment as
+ * aib a1, the etv values differ by one and a2 is valid.
+ */
+static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
+{
+	int state_a1, state_a2;
+
+	/* Paranoia check: e0/e1 should better be the same. */
+	if (a1->esw.eacr.e0 != a2->esw.eacr.e0 ||
+	    a1->esw.eacr.e1 != a2->esw.eacr.e1)
+		return 0;
+
+	/* Still connected to the same etr ? */
+	state_a1 = p ? a1->esw.psc1 : a1->esw.psc0;
+	state_a2 = p ? a2->esw.psc1 : a2->esw.psc0;
+	if (state_a1 == etr_lpsc_operational_step) {
+		if (state_a2 != etr_lpsc_operational_step ||
+		    a1->edf1.net_id != a2->edf1.net_id ||
+		    a1->edf1.etr_id != a2->edf1.etr_id ||
+		    a1->edf1.etr_pn != a2->edf1.etr_pn)
+			return 0;
+	} else if (state_a2 != etr_lpsc_pps_mode)
+		return 0;
+
+	/* The ETV value of a2 needs to be ETV of a1 + 1. */
+	if (a1->edf2.etv + 1 != a2->edf2.etv)
+		return 0;
+
+	if (!etr_port_valid(a2, p))
+		return 0;
+
+	return 1;
+}
+
+struct clock_sync_data {
+	atomic_t cpus;
+	int in_sync;
+	unsigned long long fixup_cc;
+	int etr_port;
+	struct etr_aib *etr_aib;
+};
+
+static void clock_sync_cpu(struct clock_sync_data *sync)
+{
+	atomic_dec(&sync->cpus);
+	enable_sync_clock();
+	/*
+	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
+	 * is called on all other cpus while the TOD clocks is stopped.
+	 * __udelay will stop the cpu on an enabled wait psw until the
+	 * TOD is running again.
+	 */
+	while (sync->in_sync == 0) {
+		__udelay(1);
+		/*
+		 * A different cpu changes *in_sync. Therefore use
+		 * barrier() to force memory access.
+		 */
+		barrier();
+	}
+	if (sync->in_sync != 1)
+		/* Didn't work. Clear per-cpu in sync bit again. */
+		disable_sync_clock(NULL);
+	/*
+	 * This round of TOD syncing is done. Set the clock comparator
+	 * to the next tick and let the processor continue.
+	 */
+	fixup_clock_comparator(sync->fixup_cc);
+}
+
+/*
+ * Sync the TOD clock using the port referred to by aibp. This port
+ * has to be enabled and the other port has to be disabled. The
+ * last eacr update has to be more than 1.6 seconds in the past.
+ */
+static int etr_sync_clock(void *data)
+{
+	static int first;
+	unsigned long long clock, old_clock, delay, delta;
+	struct clock_sync_data *etr_sync;
+	struct etr_aib *sync_port, *aib;
+	int port;
+	int rc;
+
+	etr_sync = data;
+
+	if (xchg(&first, 1) == 1) {
+		/* Slave */
+		clock_sync_cpu(etr_sync);
+		return 0;
+	}
+
+	/* Wait until all other cpus entered the sync function. */
+	while (atomic_read(&etr_sync->cpus) != 0)
+		cpu_relax();
+
+	port = etr_sync->etr_port;
+	aib = etr_sync->etr_aib;
+	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+	enable_sync_clock();
+
+	/* Set clock to next OTE. */
+	__ctl_set_bit(14, 21);
+	__ctl_set_bit(0, 29);
+	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
+	old_clock = get_tod_clock();
+	if (set_tod_clock(clock) == 0) {
+		__udelay(1);	/* Wait for the clock to start. */
+		__ctl_clear_bit(0, 29);
+		__ctl_clear_bit(14, 21);
+		etr_stetr(aib);
+		/* Adjust Linux timing variables. */
+		delay = (unsigned long long)
+			(aib->edf2.etv - sync_port->edf2.etv) << 32;
+		delta = adjust_time(old_clock, clock, delay);
+		etr_sync->fixup_cc = delta;
+		fixup_clock_comparator(delta);
+		/* Verify that the clock is properly set. */
+		if (!etr_aib_follows(sync_port, aib, port)) {
+			/* Didn't work. */
+			disable_sync_clock(NULL);
+			etr_sync->in_sync = -EAGAIN;
+			rc = -EAGAIN;
+		} else {
+			etr_sync->in_sync = 1;
+			rc = 0;
+		}
+	} else {
+		/* Could not set the clock ?!? */
+		__ctl_clear_bit(0, 29);
+		__ctl_clear_bit(14, 21);
+		disable_sync_clock(NULL);
+		etr_sync->in_sync = -EAGAIN;
+		rc = -EAGAIN;
+	}
+	xchg(&first, 0);
+	return rc;
+}
+
+static int etr_sync_clock_stop(struct etr_aib *aib, int port)
+{
+	struct clock_sync_data etr_sync;
+	struct etr_aib *sync_port;
+	int follows;
+	int rc;
+
+	/* Check if the current aib is adjacent to the sync port aib. */
+	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+	follows = etr_aib_follows(sync_port, aib, port);
+	memcpy(sync_port, aib, sizeof(*aib));
+	if (!follows)
+		return -EAGAIN;
+	memset(&etr_sync, 0, sizeof(etr_sync));
+	etr_sync.etr_aib = aib;
+	etr_sync.etr_port = port;
+	get_online_cpus();
+	atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
+	rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask);
+	put_online_cpus();
+	return rc;
+}
+
+/*
+ * Handle the immediate effects of the different events.
+ * The port change event is used for online/offline changes.
+ */
+static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
+{
+	if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events))
+		eacr.es = 0;
+	if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events))
+		eacr.es = eacr.sl = 0;
+	if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events))
+		etr_port0_uptodate = etr_port1_uptodate = 0;
+
+	if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) {
+		if (eacr.e0)
+			/*
+			 * Port change of an enabled port. We have to
+			 * assume that this can have caused an stepping
+			 * port switch.
+			 */
+			etr_tolec = get_tod_clock();
+		eacr.p0 = etr_port0_online;
+		if (!eacr.p0)
+			eacr.e0 = 0;
+		etr_port0_uptodate = 0;
+	}
+	if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) {
+		if (eacr.e1)
+			/*
+			 * Port change of an enabled port. We have to
+			 * assume that this can have caused an stepping
+			 * port switch.
+			 */
+			etr_tolec = get_tod_clock();
+		eacr.p1 = etr_port1_online;
+		if (!eacr.p1)
+			eacr.e1 = 0;
+		etr_port1_uptodate = 0;
+	}
+	clear_bit(ETR_EVENT_UPDATE, &etr_events);
+	return eacr;
+}
+
+/*
+ * Set up a timer that expires after the etr_tolec + 1.6 seconds if
+ * one of the ports needs an update.
+ */
+static void etr_set_tolec_timeout(unsigned long long now)
+{
+	unsigned long micros;
+
+	if ((!etr_eacr.p0 || etr_port0_uptodate) &&
+	    (!etr_eacr.p1 || etr_port1_uptodate))
+		return;
+	micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0;
+	micros = (micros > 1600000) ? 0 : 1600000 - micros;
+	mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1);
+}
+
+/*
+ * Set up a time that expires after 1/2 second.
+ */
+static void etr_set_sync_timeout(void)
+{
+	mod_timer(&etr_timer, jiffies + HZ/2);
+}
+
+/*
+ * Update the aib information for one or both ports.
+ */
+static struct etr_eacr etr_handle_update(struct etr_aib *aib,
+					 struct etr_eacr eacr)
+{
+	/* With both ports disabled the aib information is useless. */
+	if (!eacr.e0 && !eacr.e1)
+		return eacr;
+
+	/* Update port0 or port1 with aib stored in etr_work_fn. */
+	if (aib->esw.q == 0) {
+		/* Information for port 0 stored. */
+		if (eacr.p0 && !etr_port0_uptodate) {
+			etr_port0 = *aib;
+			if (etr_port0_online)
+				etr_port0_uptodate = 1;
+		}
+	} else {
+		/* Information for port 1 stored. */
+		if (eacr.p1 && !etr_port1_uptodate) {
+			etr_port1 = *aib;
+			if (etr_port0_online)
+				etr_port1_uptodate = 1;
+		}
+	}
+
+	/*
+	 * Do not try to get the alternate port aib if the clock
+	 * is not in sync yet.
+	 */
+	if (!eacr.es || !check_sync_clock())
+		return eacr;
+
+	/*
+	 * If steai is available we can get the information about
+	 * the other port immediately. If only stetr is available the
+	 * data-port bit toggle has to be used.
+	 */
+	if (etr_steai_available) {
+		if (eacr.p0 && !etr_port0_uptodate) {
+			etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
+			etr_port0_uptodate = 1;
+		}
+		if (eacr.p1 && !etr_port1_uptodate) {
+			etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1);
+			etr_port1_uptodate = 1;
+		}
+	} else {
+		/*
+		 * One port was updated above, if the other
+		 * port is not uptodate toggle dp bit.
+		 */
+		if ((eacr.p0 && !etr_port0_uptodate) ||
+		    (eacr.p1 && !etr_port1_uptodate))
+			eacr.dp ^= 1;
+		else
+			eacr.dp = 0;
+	}
+	return eacr;
+}
+
+/*
+ * Write new etr control register if it differs from the current one.
+ * Return 1 if etr_tolec has been updated as well.
+ */
+static void etr_update_eacr(struct etr_eacr eacr)
+{
+	int dp_changed;
+
+	if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0)
+		/* No change, return. */
+		return;
+	/*
+	 * The disable of an active port of the change of the data port
+	 * bit can/will cause a change in the data port.
+	 */
+	dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 ||
+		(etr_eacr.dp ^ eacr.dp) != 0;
+	etr_eacr = eacr;
+	etr_setr(&etr_eacr);
+	if (dp_changed)
+		etr_tolec = get_tod_clock();
+}
+
+/*
+ * ETR work. In this function you'll find the main logic. In
+ * particular this is the only function that calls etr_update_eacr(),
+ * it "controls" the etr control register.
+ */
+static void etr_work_fn(struct work_struct *work)
+{
+	unsigned long long now;
+	struct etr_eacr eacr;
+	struct etr_aib aib;
+	int sync_port;
+
+	/* prevent multiple execution. */
+	mutex_lock(&etr_work_mutex);
+
+	/* Create working copy of etr_eacr. */
+	eacr = etr_eacr;
+
+	/* Check for the different events and their immediate effects. */
+	eacr = etr_handle_events(eacr);
+
+	/* Check if ETR is supposed to be active. */
+	eacr.ea = eacr.p0 || eacr.p1;
+	if (!eacr.ea) {
+		/* Both ports offline. Reset everything. */
+		eacr.dp = eacr.es = eacr.sl = 0;
+		on_each_cpu(disable_sync_clock, NULL, 1);
+		del_timer_sync(&etr_timer);
+		etr_update_eacr(eacr);
+		goto out_unlock;
+	}
+
+	/* Store aib to get the current ETR status word. */
+	BUG_ON(etr_stetr(&aib) != 0);
+	etr_port0.esw = etr_port1.esw = aib.esw;	/* Copy status word. */
+	now = get_tod_clock();
+
+	/*
+	 * Update the port information if the last stepping port change
+	 * or data port change is older than 1.6 seconds.
+	 */
+	if (now >= etr_tolec + (1600000 << 12))
+		eacr = etr_handle_update(&aib, eacr);
+
+	/*
+	 * Select ports to enable. The preferred synchronization mode is PPS.
+	 * If a port can be enabled depends on a number of things:
+	 * 1) The port needs to be online and uptodate. A port is not
+	 *    disabled just because it is not uptodate, but it is only
+	 *    enabled if it is uptodate.
+	 * 2) The port needs to have the same mode (pps / etr).
+	 * 3) The port needs to be usable -> etr_port_valid() == 1
+	 * 4) To enable the second port the clock needs to be in sync.
+	 * 5) If both ports are useable and are ETR ports, the network id
+	 *    has to be the same.
+	 * The eacr.sl bit is used to indicate etr mode vs. pps mode.
+	 */
+	if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) {
+		eacr.sl = 0;
+		eacr.e0 = 1;
+		if (!etr_mode_is_pps(etr_eacr))
+			eacr.es = 0;
+		if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode)
+			eacr.e1 = 0;
+		// FIXME: uptodate checks ?
+		else if (etr_port0_uptodate && etr_port1_uptodate)
+			eacr.e1 = 1;
+		sync_port = (etr_port0_uptodate &&
+			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
+	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
+		eacr.sl = 0;
+		eacr.e0 = 0;
+		eacr.e1 = 1;
+		if (!etr_mode_is_pps(etr_eacr))
+			eacr.es = 0;
+		sync_port = (etr_port1_uptodate &&
+			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
+	} else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
+		eacr.sl = 1;
+		eacr.e0 = 1;
+		if (!etr_mode_is_etr(etr_eacr))
+			eacr.es = 0;
+		if (!eacr.es || !eacr.p1 ||
+		    aib.esw.psc1 != etr_lpsc_operational_alt)
+			eacr.e1 = 0;
+		else if (etr_port0_uptodate && etr_port1_uptodate &&
+			 etr_compare_network(&etr_port0, &etr_port1))
+			eacr.e1 = 1;
+		sync_port = (etr_port0_uptodate &&
+			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
+	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
+		eacr.sl = 1;
+		eacr.e0 = 0;
+		eacr.e1 = 1;
+		if (!etr_mode_is_etr(etr_eacr))
+			eacr.es = 0;
+		sync_port = (etr_port1_uptodate &&
+			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
+	} else {
+		/* Both ports not usable. */
+		eacr.es = eacr.sl = 0;
+		sync_port = -1;
+	}
+
+	/*
+	 * If the clock is in sync just update the eacr and return.
+	 * If there is no valid sync port wait for a port update.
+	 */
+	if ((eacr.es && check_sync_clock()) || sync_port < 0) {
+		etr_update_eacr(eacr);
+		etr_set_tolec_timeout(now);
+		goto out_unlock;
+	}
+
+	/*
+	 * Prepare control register for clock syncing
+	 * (reset data port bit, set sync check control.
+	 */
+	eacr.dp = 0;
+	eacr.es = 1;
+
+	/*
+	 * Update eacr and try to synchronize the clock. If the update
+	 * of eacr caused a stepping port switch (or if we have to
+	 * assume that a stepping port switch has occurred) or the
+	 * clock syncing failed, reset the sync check control bit
+	 * and set up a timer to try again after 0.5 seconds
+	 */
+	etr_update_eacr(eacr);
+	if (now < etr_tolec + (1600000 << 12) ||
+	    etr_sync_clock_stop(&aib, sync_port) != 0) {
+		/* Sync failed. Try again in 1/2 second. */
+		eacr.es = 0;
+		etr_update_eacr(eacr);
+		etr_set_sync_timeout();
+	} else
+		etr_set_tolec_timeout(now);
+out_unlock:
+	mutex_unlock(&etr_work_mutex);
+}
+
+/*
+ * Sysfs interface functions
+ */
+static struct bus_type etr_subsys = {
+	.name		= "etr",
+	.dev_name	= "etr",
+};
+
+static struct device etr_port0_dev = {
+	.id	= 0,
+	.bus	= &etr_subsys,
+};
+
+static struct device etr_port1_dev = {
+	.id	= 1,
+	.bus	= &etr_subsys,
+};
+
+/*
+ * ETR subsys attributes
+ */
+static ssize_t etr_stepping_port_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "%i\n", etr_port0.esw.p);
+}
+
+static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
+
+static ssize_t etr_stepping_mode_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	char *mode_str;
+
+	if (etr_mode_is_pps(etr_eacr))
+		mode_str = "pps";
+	else if (etr_mode_is_etr(etr_eacr))
+		mode_str = "etr";
+	else
+		mode_str = "local";
+	return sprintf(buf, "%s\n", mode_str);
+}
+
+static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
+
+/*
+ * ETR port attributes
+ */
+static inline struct etr_aib *etr_aib_from_dev(struct device *dev)
+{
+	if (dev == &etr_port0_dev)
+		return etr_port0_online ? &etr_port0 : NULL;
+	else
+		return etr_port1_online ? &etr_port1 : NULL;
+}
+
+static ssize_t etr_online_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	unsigned int online;
+
+	online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online;
+	return sprintf(buf, "%i\n", online);
+}
+
+static ssize_t etr_online_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	unsigned int value;
+
+	value = simple_strtoul(buf, NULL, 0);
+	if (value != 0 && value != 1)
+		return -EINVAL;
+	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
+		return -EOPNOTSUPP;
+	mutex_lock(&clock_sync_mutex);
+	if (dev == &etr_port0_dev) {
+		if (etr_port0_online == value)
+			goto out;	/* Nothing to do. */
+		etr_port0_online = value;
+		if (etr_port0_online && etr_port1_online)
+			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+		else
+			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
+		queue_work(time_sync_wq, &etr_work);
+	} else {
+		if (etr_port1_online == value)
+			goto out;	/* Nothing to do. */
+		etr_port1_online = value;
+		if (etr_port0_online && etr_port1_online)
+			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+		else
+			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
+		queue_work(time_sync_wq, &etr_work);
+	}
+out:
+	mutex_unlock(&clock_sync_mutex);
+	return count;
+}
+
+static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store);
+
+static ssize_t etr_stepping_control_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
+		       etr_eacr.e0 : etr_eacr.e1);
+}
+
+static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
+
+static ssize_t etr_mode_code_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	if (!etr_port0_online && !etr_port1_online)
+		/* Status word is not uptodate if both ports are offline. */
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
+		       etr_port0.esw.psc0 : etr_port0.esw.psc1);
+}
+
+static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL);
+
+static ssize_t etr_untuned_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct etr_aib *aib = etr_aib_from_dev(dev);
+
+	if (!aib || !aib->slsw.v1)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", aib->edf1.u);
+}
+
+static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL);
+
+static ssize_t etr_network_id_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct etr_aib *aib = etr_aib_from_dev(dev);
+
+	if (!aib || !aib->slsw.v1)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", aib->edf1.net_id);
+}
+
+static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL);
+
+static ssize_t etr_id_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct etr_aib *aib = etr_aib_from_dev(dev);
+
+	if (!aib || !aib->slsw.v1)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", aib->edf1.etr_id);
+}
+
+static DEVICE_ATTR(id, 0400, etr_id_show, NULL);
+
+static ssize_t etr_port_number_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct etr_aib *aib = etr_aib_from_dev(dev);
+
+	if (!aib || !aib->slsw.v1)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", aib->edf1.etr_pn);
+}
+
+static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL);
+
+static ssize_t etr_coupled_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct etr_aib *aib = etr_aib_from_dev(dev);
+
+	if (!aib || !aib->slsw.v3)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", aib->edf3.c);
+}
+
+static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL);
+
+static ssize_t etr_local_time_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct etr_aib *aib = etr_aib_from_dev(dev);
+
+	if (!aib || !aib->slsw.v3)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", aib->edf3.blto);
+}
+
+static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL);
+
+static ssize_t etr_utc_offset_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct etr_aib *aib = etr_aib_from_dev(dev);
+
+	if (!aib || !aib->slsw.v3)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", aib->edf3.buo);
+}
+
+static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
+
+static struct device_attribute *etr_port_attributes[] = {
+	&dev_attr_online,
+	&dev_attr_stepping_control,
+	&dev_attr_state_code,
+	&dev_attr_untuned,
+	&dev_attr_network,
+	&dev_attr_id,
+	&dev_attr_port,
+	&dev_attr_coupled,
+	&dev_attr_local_time,
+	&dev_attr_utc_offset,
+	NULL
+};
+
+static int __init etr_register_port(struct device *dev)
+{
+	struct device_attribute **attr;
+	int rc;
+
+	rc = device_register(dev);
+	if (rc)
+		goto out;
+	for (attr = etr_port_attributes; *attr; attr++) {
+		rc = device_create_file(dev, *attr);
+		if (rc)
+			goto out_unreg;
+	}
+	return 0;
+out_unreg:
+	for (; attr >= etr_port_attributes; attr--)
+		device_remove_file(dev, *attr);
+	device_unregister(dev);
+out:
+	return rc;
+}
+
+static void __init etr_unregister_port(struct device *dev)
+{
+	struct device_attribute **attr;
+
+	for (attr = etr_port_attributes; *attr; attr++)
+		device_remove_file(dev, *attr);
+	device_unregister(dev);
+}
+
+static int __init etr_init_sysfs(void)
+{
+	int rc;
+
+	rc = subsys_system_register(&etr_subsys, NULL);
+	if (rc)
+		goto out;
+	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port);
+	if (rc)
+		goto out_unreg_subsys;
+	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
+	if (rc)
+		goto out_remove_stepping_port;
+	rc = etr_register_port(&etr_port0_dev);
+	if (rc)
+		goto out_remove_stepping_mode;
+	rc = etr_register_port(&etr_port1_dev);
+	if (rc)
+		goto out_remove_port0;
+	return 0;
+
+out_remove_port0:
+	etr_unregister_port(&etr_port0_dev);
+out_remove_stepping_mode:
+	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
+out_remove_stepping_port:
+	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port);
+out_unreg_subsys:
+	bus_unregister(&etr_subsys);
+out:
+	return rc;
+}
+
+device_initcall(etr_init_sysfs);
+
+/*
+ * Server Time Protocol (STP) code.
+ */
+static int stp_online;
+static struct stp_sstpi stp_info;
+static void *stp_page;
+
+static void stp_work_fn(struct work_struct *work);
+static DEFINE_MUTEX(stp_work_mutex);
+static DECLARE_WORK(stp_work, stp_work_fn);
+static struct timer_list stp_timer;
+
+static int __init early_parse_stp(char *p)
+{
+	if (strncmp(p, "off", 3) == 0)
+		stp_online = 0;
+	else if (strncmp(p, "on", 2) == 0)
+		stp_online = 1;
+	return 0;
+}
+early_param("stp", early_parse_stp);
+
+/*
+ * Reset STP attachment.
+ */
+static void __init stp_reset(void)
+{
+	int rc;
+
+	stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+	if (rc == 0)
+		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+	else if (stp_online) {
+		pr_warning("The real or virtual hardware system does "
+			   "not provide an STP interface\n");
+		free_page((unsigned long) stp_page);
+		stp_page = NULL;
+		stp_online = 0;
+	}
+}
+
+static void stp_timeout(unsigned long dummy)
+{
+	queue_work(time_sync_wq, &stp_work);
+}
+
+static int __init stp_init(void)
+{
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return 0;
+	setup_timer(&stp_timer, stp_timeout, 0UL);
+	time_init_wq();
+	if (!stp_online)
+		return 0;
+	queue_work(time_sync_wq, &stp_work);
+	return 0;
+}
+
+arch_initcall(stp_init);
+
+/*
+ * STP timing alert. There are three causes:
+ * 1) timing status change
+ * 2) link availability change
+ * 3) time control parameter change
+ * In all three cases we are only interested in the clock source state.
+ * If a STP clock source is now available use it.
+ */
+static void stp_timing_alert(struct stp_irq_parm *intparm)
+{
+	if (intparm->tsc || intparm->lac || intparm->tcpc)
+		queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP sync check machine check. This is called when the timing state
+ * changes from the synchronized state to the unsynchronized state.
+ * After a STP sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+void stp_sync_check(void)
+{
+	disable_sync_clock(NULL);
+	queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP island condition machine check. This is called when an attached
+ * server  attempts to communicate over an STP link and the servers
+ * have matching CTN ids and have a valid stratum-1 configuration
+ * but the configurations do not match.
+ */
+void stp_island_check(void)
+{
+	disable_sync_clock(NULL);
+	queue_work(time_sync_wq, &stp_work);
+}
+
+
+static int stp_sync_clock(void *data)
+{
+	static int first;
+	unsigned long long old_clock, delta;
+	struct clock_sync_data *stp_sync;
+	int rc;
+
+	stp_sync = data;
+
+	if (xchg(&first, 1) == 1) {
+		/* Slave */
+		clock_sync_cpu(stp_sync);
+		return 0;
+	}
+
+	/* Wait until all other cpus entered the sync function. */
+	while (atomic_read(&stp_sync->cpus) != 0)
+		cpu_relax();
+
+	enable_sync_clock();
+
+	rc = 0;
+	if (stp_info.todoff[0] || stp_info.todoff[1] ||
+	    stp_info.todoff[2] || stp_info.todoff[3] ||
+	    stp_info.tmd != 2) {
+		old_clock = get_tod_clock();
+		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+		if (rc == 0) {
+			delta = adjust_time(old_clock, get_tod_clock(), 0);
+			fixup_clock_comparator(delta);
+			rc = chsc_sstpi(stp_page, &stp_info,
+					sizeof(struct stp_sstpi));
+			if (rc == 0 && stp_info.tmd != 2)
+				rc = -EAGAIN;
+		}
+	}
+	if (rc) {
+		disable_sync_clock(NULL);
+		stp_sync->in_sync = -EAGAIN;
+	} else
+		stp_sync->in_sync = 1;
+	xchg(&first, 0);
+	return 0;
+}
+
+/*
+ * STP work. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+static void stp_work_fn(struct work_struct *work)
+{
+	struct clock_sync_data stp_sync;
+	int rc;
+
+	/* prevent multiple execution. */
+	mutex_lock(&stp_work_mutex);
+
+	if (!stp_online) {
+		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+		del_timer_sync(&stp_timer);
+		goto out_unlock;
+	}
+
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+	if (rc)
+		goto out_unlock;
+
+	rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+	if (rc || stp_info.c == 0)
+		goto out_unlock;
+
+	/* Skip synchronization if the clock is already in sync. */
+	if (check_sync_clock())
+		goto out_unlock;
+
+	memset(&stp_sync, 0, sizeof(stp_sync));
+	get_online_cpus();
+	atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+	stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
+	put_online_cpus();
+
+	if (!check_sync_clock())
+		/*
+		 * There is a usable clock but the synchonization failed.
+		 * Retry after a second.
+		 */
+		mod_timer(&stp_timer, jiffies + HZ);
+
+out_unlock:
+	mutex_unlock(&stp_work_mutex);
+}
+
+/*
+ * STP subsys sysfs interface functions
+ */
+static struct bus_type stp_subsys = {
+	.name		= "stp",
+	.dev_name	= "stp",
+};
+
+static ssize_t stp_ctn_id_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%016llx\n",
+		       *(unsigned long long *) stp_info.ctnid);
+}
+
+static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+
+static ssize_t stp_ctn_type_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", stp_info.ctn);
+}
+
+static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+
+static ssize_t stp_dst_offset_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	if (!stp_online || !(stp_info.vbits & 0x2000))
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+}
+
+static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+
+static ssize_t stp_leap_seconds_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	if (!stp_online || !(stp_info.vbits & 0x8000))
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+}
+
+static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+
+static ssize_t stp_stratum_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+}
+
+static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+
+static ssize_t stp_time_offset_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	if (!stp_online || !(stp_info.vbits & 0x0800))
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int) stp_info.tto);
+}
+
+static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+
+static ssize_t stp_time_zone_offset_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	if (!stp_online || !(stp_info.vbits & 0x4000))
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+}
+
+static DEVICE_ATTR(time_zone_offset, 0400,
+			 stp_time_zone_offset_show, NULL);
+
+static ssize_t stp_timing_mode_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", stp_info.tmd);
+}
+
+static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+
+static ssize_t stp_timing_state_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", stp_info.tst);
+}
+
+static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+
+static ssize_t stp_online_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	return sprintf(buf, "%i\n", stp_online);
+}
+
+static ssize_t stp_online_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	unsigned int value;
+
+	value = simple_strtoul(buf, NULL, 0);
+	if (value != 0 && value != 1)
+		return -EINVAL;
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return -EOPNOTSUPP;
+	mutex_lock(&clock_sync_mutex);
+	stp_online = value;
+	if (stp_online)
+		set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+	else
+		clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+	queue_work(time_sync_wq, &stp_work);
+	mutex_unlock(&clock_sync_mutex);
+	return count;
+}
+
+/*
+ * Can't use DEVICE_ATTR because the attribute should be named
+ * stp/online but dev_attr_online already exists in this file ..
+ */
+static struct device_attribute dev_attr_stp_online = {
+	.attr = { .name = "online", .mode = 0600 },
+	.show	= stp_online_show,
+	.store	= stp_online_store,
+};
+
+static struct device_attribute *stp_attributes[] = {
+	&dev_attr_ctn_id,
+	&dev_attr_ctn_type,
+	&dev_attr_dst_offset,
+	&dev_attr_leap_seconds,
+	&dev_attr_stp_online,
+	&dev_attr_stratum,
+	&dev_attr_time_offset,
+	&dev_attr_time_zone_offset,
+	&dev_attr_timing_mode,
+	&dev_attr_timing_state,
+	NULL
+};
+
+static int __init stp_init_sysfs(void)
+{
+	struct device_attribute **attr;
+	int rc;
+
+	rc = subsys_system_register(&stp_subsys, NULL);
+	if (rc)
+		goto out;
+	for (attr = stp_attributes; *attr; attr++) {
+		rc = device_create_file(stp_subsys.dev_root, *attr);
+		if (rc)
+			goto out_unreg;
+	}
+	return 0;
+out_unreg:
+	for (; attr >= stp_attributes; attr--)
+		device_remove_file(stp_subsys.dev_root, *attr);
+	bus_unregister(&stp_subsys);
+out:
+	return rc;
+}
+
+device_initcall(stp_init_sysfs);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
new file mode 100644
index 000000000..5728c5bd4
--- /dev/null
+++ b/arch/s390/kernel/topology.c
@@ -0,0 +1,501 @@
+/*
+ *    Copyright IBM Corp. 2007, 2011
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/cpuset.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <asm/sysinfo.h>
+
+#define PTF_HORIZONTAL	(0UL)
+#define PTF_VERTICAL	(1UL)
+#define PTF_CHECK	(2UL)
+
+struct mask_info {
+	struct mask_info *next;
+	unsigned char id;
+	cpumask_t mask;
+};
+
+static void set_topology_timer(void);
+static void topology_work_fn(struct work_struct *work);
+static struct sysinfo_15_1_x *tl_info;
+
+static int topology_enabled = 1;
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+/* topology_lock protects the socket and book linked lists */
+static DEFINE_SPINLOCK(topology_lock);
+static struct mask_info socket_info;
+static struct mask_info book_info;
+
+DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
+
+static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
+{
+	cpumask_t mask;
+
+	cpumask_copy(&mask, cpumask_of(cpu));
+	if (!topology_enabled || !MACHINE_HAS_TOPOLOGY)
+		return mask;
+	for (; info; info = info->next) {
+		if (cpumask_test_cpu(cpu, &info->mask))
+			return info->mask;
+	}
+	return mask;
+}
+
+static cpumask_t cpu_thread_map(unsigned int cpu)
+{
+	cpumask_t mask;
+	int i;
+
+	cpumask_copy(&mask, cpumask_of(cpu));
+	if (!topology_enabled || !MACHINE_HAS_TOPOLOGY)
+		return mask;
+	cpu -= cpu % (smp_cpu_mtid + 1);
+	for (i = 0; i <= smp_cpu_mtid; i++)
+		if (cpu_present(cpu + i))
+			cpumask_set_cpu(cpu + i, &mask);
+	return mask;
+}
+
+static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
+					  struct mask_info *book,
+					  struct mask_info *socket,
+					  int one_socket_per_cpu)
+{
+	unsigned int core;
+
+	for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) {
+		unsigned int rcore;
+		int lcpu, i;
+
+		rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
+		lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+		if (lcpu < 0)
+			continue;
+		for (i = 0; i <= smp_cpu_mtid; i++) {
+			per_cpu(cpu_topology, lcpu + i).book_id = book->id;
+			per_cpu(cpu_topology, lcpu + i).core_id = rcore;
+			per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i;
+			cpumask_set_cpu(lcpu + i, &book->mask);
+			cpumask_set_cpu(lcpu + i, &socket->mask);
+			if (one_socket_per_cpu)
+				per_cpu(cpu_topology, lcpu + i).socket_id = rcore;
+			else
+				per_cpu(cpu_topology, lcpu + i).socket_id = socket->id;
+			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
+		}
+		if (one_socket_per_cpu)
+			socket = socket->next;
+	}
+	return socket;
+}
+
+static void clear_masks(void)
+{
+	struct mask_info *info;
+
+	info = &socket_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
+	info = &book_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
+}
+
+static union topology_entry *next_tle(union topology_entry *tle)
+{
+	if (!tle->nl)
+		return (union topology_entry *)((struct topology_core *)tle + 1);
+	return (union topology_entry *)((struct topology_container *)tle + 1);
+}
+
+static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
+{
+	struct mask_info *socket = &socket_info;
+	struct mask_info *book = &book_info;
+	union topology_entry *tle, *end;
+
+	tle = info->tle;
+	end = (union topology_entry *)((unsigned long)info + info->length);
+	while (tle < end) {
+		switch (tle->nl) {
+		case 2:
+			book = book->next;
+			book->id = tle->container.id;
+			break;
+		case 1:
+			socket = socket->next;
+			socket->id = tle->container.id;
+			break;
+		case 0:
+			add_cpus_to_mask(&tle->cpu, book, socket, 0);
+			break;
+		default:
+			clear_masks();
+			return;
+		}
+		tle = next_tle(tle);
+	}
+}
+
+static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
+{
+	struct mask_info *socket = &socket_info;
+	struct mask_info *book = &book_info;
+	union topology_entry *tle, *end;
+
+	tle = info->tle;
+	end = (union topology_entry *)((unsigned long)info + info->length);
+	while (tle < end) {
+		switch (tle->nl) {
+		case 1:
+			book = book->next;
+			book->id = tle->container.id;
+			break;
+		case 0:
+			socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+			break;
+		default:
+			clear_masks();
+			return;
+		}
+		tle = next_tle(tle);
+	}
+}
+
+static void tl_to_masks(struct sysinfo_15_1_x *info)
+{
+	struct cpuid cpu_id;
+
+	spin_lock_irq(&topology_lock);
+	get_cpu_id(&cpu_id);
+	clear_masks();
+	switch (cpu_id.machine) {
+	case 0x2097:
+	case 0x2098:
+		__tl_to_masks_z10(info);
+		break;
+	default:
+		__tl_to_masks_generic(info);
+	}
+	spin_unlock_irq(&topology_lock);
+}
+
+static void topology_update_polarization_simple(void)
+{
+	int cpu;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	for_each_possible_cpu(cpu)
+		smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
+	mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static int ptf(unsigned long fc)
+{
+	int rc;
+
+	asm volatile(
+		"	.insn	rre,0xb9a20000,%1,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc)
+		: "d" (fc)  : "cc");
+	return rc;
+}
+
+int topology_set_cpu_management(int fc)
+{
+	int cpu, rc;
+
+	if (!MACHINE_HAS_TOPOLOGY)
+		return -EOPNOTSUPP;
+	if (fc)
+		rc = ptf(PTF_VERTICAL);
+	else
+		rc = ptf(PTF_HORIZONTAL);
+	if (rc)
+		return -EBUSY;
+	for_each_possible_cpu(cpu)
+		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+	return rc;
+}
+
+static void update_cpu_masks(void)
+{
+	unsigned long flags;
+	int cpu;
+
+	spin_lock_irqsave(&topology_lock, flags);
+	for_each_possible_cpu(cpu) {
+		per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
+		per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
+		per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu);
+		if (!MACHINE_HAS_TOPOLOGY) {
+			per_cpu(cpu_topology, cpu).thread_id = cpu;
+			per_cpu(cpu_topology, cpu).core_id = cpu;
+			per_cpu(cpu_topology, cpu).socket_id = cpu;
+			per_cpu(cpu_topology, cpu).book_id = cpu;
+		}
+	}
+	spin_unlock_irqrestore(&topology_lock, flags);
+}
+
+void store_topology(struct sysinfo_15_1_x *info)
+{
+	if (topology_max_mnest >= 3)
+		stsi(info, 15, 1, 3);
+	else
+		stsi(info, 15, 1, 2);
+}
+
+int arch_update_cpu_topology(void)
+{
+	struct sysinfo_15_1_x *info = tl_info;
+	struct device *dev;
+	int cpu;
+
+	if (!MACHINE_HAS_TOPOLOGY) {
+		update_cpu_masks();
+		topology_update_polarization_simple();
+		return 0;
+	}
+	store_topology(info);
+	tl_to_masks(info);
+	update_cpu_masks();
+	for_each_online_cpu(cpu) {
+		dev = get_cpu_device(cpu);
+		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+	}
+	return 1;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+	rebuild_sched_domains();
+}
+
+void topology_schedule_update(void)
+{
+	schedule_work(&topology_work);
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+	if (ptf(PTF_CHECK))
+		topology_schedule_update();
+	set_topology_timer();
+}
+
+static struct timer_list topology_timer =
+	TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0);
+
+static atomic_t topology_poll = ATOMIC_INIT(0);
+
+static void set_topology_timer(void)
+{
+	if (atomic_add_unless(&topology_poll, -1, 0))
+		mod_timer(&topology_timer, jiffies + HZ / 10);
+	else
+		mod_timer(&topology_timer, jiffies + HZ * 60);
+}
+
+void topology_expect_change(void)
+{
+	if (!MACHINE_HAS_TOPOLOGY)
+		return;
+	/* This is racy, but it doesn't matter since it is just a heuristic.
+	 * Worst case is that we poll in a higher frequency for a bit longer.
+	 */
+	if (atomic_read(&topology_poll) > 60)
+		return;
+	atomic_add(60, &topology_poll);
+	set_topology_timer();
+}
+
+static int cpu_management;
+
+static ssize_t dispatching_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", cpu_management);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t dispatching_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf,
+				 size_t count)
+{
+	int val, rc;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	rc = 0;
+	get_online_cpus();
+	mutex_lock(&smp_cpu_state_mutex);
+	if (cpu_management == val)
+		goto out;
+	rc = topology_set_cpu_management(val);
+	if (rc)
+		goto out;
+	cpu_management = val;
+	topology_expect_change();
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	put_online_cpus();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR(dispatching, 0644, dispatching_show,
+			 dispatching_store);
+
+static ssize_t cpu_polarization_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	int cpu = dev->id;
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	switch (smp_cpu_get_polarization(cpu)) {
+	case POLARIZATION_HRZ:
+		count = sprintf(buf, "horizontal\n");
+		break;
+	case POLARIZATION_VL:
+		count = sprintf(buf, "vertical:low\n");
+		break;
+	case POLARIZATION_VM:
+		count = sprintf(buf, "vertical:medium\n");
+		break;
+	case POLARIZATION_VH:
+		count = sprintf(buf, "vertical:high\n");
+		break;
+	default:
+		count = sprintf(buf, "unknown\n");
+		break;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
+static struct attribute *topology_cpu_attrs[] = {
+	&dev_attr_polarization.attr,
+	NULL,
+};
+
+static struct attribute_group topology_cpu_attr_group = {
+	.attrs = topology_cpu_attrs,
+};
+
+int topology_cpu_init(struct cpu *cpu)
+{
+	return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+}
+
+static const struct cpumask *cpu_thread_mask(int cpu)
+{
+	return &per_cpu(cpu_topology, cpu).thread_mask;
+}
+
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &per_cpu(cpu_topology, cpu).core_mask;
+}
+
+static const struct cpumask *cpu_book_mask(int cpu)
+{
+	return &per_cpu(cpu_topology, cpu).book_mask;
+}
+
+static int __init early_parse_topology(char *p)
+{
+	if (strncmp(p, "off", 3))
+		return 0;
+	topology_enabled = 0;
+	return 0;
+}
+early_param("topology", early_parse_topology);
+
+static struct sched_domain_topology_level s390_topology[] = {
+	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+			       struct mask_info *mask, int offset)
+{
+	int i, nr_masks;
+
+	nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+	for (i = 0; i < info->mnest - offset; i++)
+		nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+	nr_masks = max(nr_masks, 1);
+	for (i = 0; i < nr_masks; i++) {
+		mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL);
+		mask = mask->next;
+	}
+}
+
+static int __init s390_topology_init(void)
+{
+	struct sysinfo_15_1_x *info;
+	int i;
+
+	if (!MACHINE_HAS_TOPOLOGY)
+		return 0;
+	tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL);
+	info = tl_info;
+	store_topology(info);
+	pr_info("The CPU configuration topology of the machine is:");
+	for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+		printk(KERN_CONT " %d", info->mag[i]);
+	printk(KERN_CONT " / %d\n", info->mnest);
+	alloc_masks(info, &socket_info, 1);
+	alloc_masks(info, &book_info, 2);
+	set_sched_topology(s390_topology);
+	return 0;
+}
+early_initcall(s390_topology_init);
+
+static int __init topology_init(void)
+{
+	if (MACHINE_HAS_TOPOLOGY)
+		set_topology_timer();
+	else
+		topology_update_polarization_simple();
+	return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
+}
+device_initcall(topology_init);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
new file mode 100644
index 000000000..4d96c9f53
--- /dev/null
+++ b/arch/s390/kernel/traps.c
@@ -0,0 +1,342 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *
+ *  Derived from "arch/i386/kernel/traps.c"
+ *    Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+int show_unhandled_signals = 1;
+
+static inline void __user *get_trap_ip(struct pt_regs *regs)
+{
+	unsigned long address;
+
+	if (regs->int_code & 0x200)
+		address = *(unsigned long *)(current->thread.trap_tdb + 24);
+	else
+		address = regs->psw.addr;
+	return (void __user *)
+		((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+}
+
+static inline void report_user_fault(struct pt_regs *regs, int signr)
+{
+	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
+		return;
+	if (!unhandled_signal(current, signr))
+		return;
+	if (!printk_ratelimit())
+		return;
+	printk("User process fault: interruption code %04x ilc:%d ",
+	       regs->int_code & 0xffff, regs->int_code >> 17);
+	print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+	printk("\n");
+	show_regs(regs);
+}
+
+int is_valid_bugaddr(unsigned long addr)
+{
+	return 1;
+}
+
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+	siginfo_t info;
+
+	if (user_mode(regs)) {
+		info.si_signo = si_signo;
+		info.si_errno = 0;
+		info.si_code = si_code;
+		info.si_addr = get_trap_ip(regs);
+		force_sig_info(si_signo, &info, current);
+		report_user_fault(regs, si_signo);
+        } else {
+                const struct exception_table_entry *fixup;
+                fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+                if (fixup)
+			regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+		else {
+			enum bug_trap_type btt;
+
+			btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+			if (btt == BUG_TRAP_TYPE_WARN)
+				return;
+			die(regs, str);
+		}
+        }
+}
+
+static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+	if (notify_die(DIE_TRAP, str, regs, 0,
+		       regs->int_code, si_signo) == NOTIFY_STOP)
+		return;
+	do_report_trap(regs, si_signo, si_code, str);
+}
+NOKPROBE_SYMBOL(do_trap);
+
+void do_per_trap(struct pt_regs *regs)
+{
+	siginfo_t info;
+
+	if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+		return;
+	if (!current->ptrace)
+		return;
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code = TRAP_HWBKPT;
+	info.si_addr =
+		(void __force __user *) current->thread.per_event.address;
+	force_sig_info(SIGTRAP, &info, current);
+}
+NOKPROBE_SYMBOL(do_per_trap);
+
+void default_trap_handler(struct pt_regs *regs)
+{
+	if (user_mode(regs)) {
+		report_user_fault(regs, SIGSEGV);
+		do_exit(SIGSEGV);
+	} else
+		die(regs, "Unknown program exception");
+}
+
+#define DO_ERROR_INFO(name, signr, sicode, str) \
+void name(struct pt_regs *regs)			\
+{						\
+	do_trap(regs, signr, sicode, str);	\
+}
+
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
+	      "addressing exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
+	      "execute exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
+	      "fixpoint divide exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
+	      "fixpoint overflow exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
+	      "HFP overflow exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
+	      "HFP underflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
+	      "HFP significance exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
+	      "HFP divide exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
+	      "HFP square root exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
+	      "operand exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
+	      "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
+	      "special operation exception")
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
+	      "transaction constraint exception")
+
+static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+{
+	int si_code = 0;
+	/* FPC[2] is Data Exception Code */
+	if ((fpc & 0x00000300) == 0) {
+		/* bits 6 and 7 of DXC are 0 iff IEEE exception */
+		if (fpc & 0x8000) /* invalid fp operation */
+			si_code = FPE_FLTINV;
+		else if (fpc & 0x4000) /* div by 0 */
+			si_code = FPE_FLTDIV;
+		else if (fpc & 0x2000) /* overflow */
+			si_code = FPE_FLTOVF;
+		else if (fpc & 0x1000) /* underflow */
+			si_code = FPE_FLTUND;
+		else if (fpc & 0x0800) /* inexact */
+			si_code = FPE_FLTRES;
+	}
+	do_trap(regs, SIGFPE, si_code, "floating point exception");
+}
+
+void translation_exception(struct pt_regs *regs)
+{
+	/* May never happen. */
+	panic("Translation exception");
+}
+
+void illegal_op(struct pt_regs *regs)
+{
+	siginfo_t info;
+        __u8 opcode[6];
+	__u16 __user *location;
+	int is_uprobe_insn = 0;
+	int signal = 0;
+
+	location = get_trap_ip(regs);
+
+	if (user_mode(regs)) {
+		if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+			return;
+		if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+			if (current->ptrace) {
+				info.si_signo = SIGTRAP;
+				info.si_errno = 0;
+				info.si_code = TRAP_BRKPT;
+				info.si_addr = location;
+				force_sig_info(SIGTRAP, &info, current);
+			} else
+				signal = SIGILL;
+#ifdef CONFIG_UPROBES
+		} else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+			is_uprobe_insn = 1;
+#endif
+		} else
+			signal = SIGILL;
+	}
+	/*
+	 * We got either an illegal op in kernel mode, or user space trapped
+	 * on a uprobes illegal instruction. See if kprobes or uprobes picks
+	 * it up. If not, SIGILL.
+	 */
+	if (is_uprobe_insn || !user_mode(regs)) {
+		if (notify_die(DIE_BPT, "bpt", regs, 0,
+			       3, SIGTRAP) != NOTIFY_STOP)
+			signal = SIGILL;
+	}
+	if (signal)
+		do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
+}
+NOKPROBE_SYMBOL(illegal_op);
+
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
+	      "specification exception");
+
+int alloc_vector_registers(struct task_struct *tsk)
+{
+	__vector128 *vxrs;
+	int i;
+
+	/* Allocate vector register save area. */
+	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
+		       GFP_KERNEL|__GFP_REPEAT);
+	if (!vxrs)
+		return -ENOMEM;
+	preempt_disable();
+	if (tsk == current)
+		save_fp_regs(tsk->thread.fp_regs.fprs);
+	/* Copy the 16 floating point registers */
+	for (i = 0; i < 16; i++)
+		*(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
+	tsk->thread.vxrs = vxrs;
+	if (tsk == current) {
+		__ctl_set_bit(0, 17);
+		restore_vx_regs(vxrs);
+	}
+	preempt_enable();
+	return 0;
+}
+
+void vector_exception(struct pt_regs *regs)
+{
+	int si_code, vic;
+
+	if (!MACHINE_HAS_VX) {
+		do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
+		return;
+	}
+
+	/* get vector interrupt code from fpc */
+	asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+	vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+	switch (vic) {
+	case 1: /* invalid vector operation */
+		si_code = FPE_FLTINV;
+		break;
+	case 2: /* division by zero */
+		si_code = FPE_FLTDIV;
+		break;
+	case 3: /* overflow */
+		si_code = FPE_FLTOVF;
+		break;
+	case 4: /* underflow */
+		si_code = FPE_FLTUND;
+		break;
+	case 5:	/* inexact */
+		si_code = FPE_FLTRES;
+		break;
+	default: /* unknown cause */
+		si_code = 0;
+	}
+	do_trap(regs, SIGFPE, si_code, "vector exception");
+}
+
+static int __init disable_vector_extension(char *str)
+{
+	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+	return 1;
+}
+__setup("novx", disable_vector_extension);
+
+void data_exception(struct pt_regs *regs)
+{
+	__u16 __user *location;
+	int signal = 0;
+
+	location = get_trap_ip(regs);
+
+	asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+	/* Check for vector register enablement */
+	if (MACHINE_HAS_VX && !current->thread.vxrs &&
+	    (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+		alloc_vector_registers(current);
+		/* Vector data exception is suppressing, rewind psw. */
+		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+		clear_pt_regs_flag(regs, PIF_PER_TRAP);
+		return;
+	}
+	if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
+		signal = SIGFPE;
+	else
+		signal = SIGILL;
+	if (signal == SIGFPE)
+		do_fp_trap(regs, current->thread.fp_regs.fpc);
+	else if (signal)
+		do_trap(regs, signal, ILL_ILLOPN, "data exception");
+}
+
+void space_switch_exception(struct pt_regs *regs)
+{
+	/* Set user psw back to home space mode. */
+	if (user_mode(regs))
+		regs->psw.mask |= PSW_ASC_HOME;
+	/* Send SIGILL. */
+	do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
+}
+
+void kernel_stack_overflow(struct pt_regs *regs)
+{
+	bust_spinlocks(1);
+	printk("Kernel stack overflow.\n");
+	show_regs(regs);
+	bust_spinlocks(0);
+	panic("Corrupt kernel stack, can't continue.");
+}
+NOKPROBE_SYMBOL(kernel_stack_overflow);
+
+void __init trap_init(void)
+{
+	local_mcck_enable();
+}
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
new file mode 100644
index 000000000..66956c09d
--- /dev/null
+++ b/arch/s390/kernel/uprobes.c
@@ -0,0 +1,385 @@
+/*
+ *  User-space Probes (UProbes) for s390
+ *
+ *    Copyright IBM Corp. 2014
+ *    Author(s): Jan Willeke,
+ */
+
+#include <linux/uaccess.h>
+#include <linux/uprobes.h>
+#include <linux/compat.h>
+#include <linux/kdebug.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+#include "entry.h"
+
+#define	UPROBE_TRAP_NR	UINT_MAX
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+			     unsigned long addr)
+{
+	return probe_is_prohibited_opcode(auprobe->insn);
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT)
+		return -EINVAL;
+	if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT)
+		return -EINVAL;
+	clear_pt_regs_flag(regs, PIF_PER_TRAP);
+	auprobe->saved_per = psw_bits(regs->psw).r;
+	auprobe->saved_int_code = regs->int_code;
+	regs->int_code = UPROBE_TRAP_NR;
+	regs->psw.addr = current->utask->xol_vaddr;
+	set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+	update_cr_regs(current);
+	return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	if (regs->int_code != UPROBE_TRAP_NR)
+		return true;
+	return false;
+}
+
+static int check_per_event(unsigned short cause, unsigned long control,
+			   struct pt_regs *regs)
+{
+	if (!(regs->psw.mask & PSW_MASK_PER))
+		return 0;
+	/* user space single step */
+	if (control == 0)
+		return 1;
+	/* over indication for storage alteration */
+	if ((control & 0x20200000) && (cause & 0x2000))
+		return 1;
+	if (cause & 0x8000) {
+		/* all branches */
+		if ((control & 0x80800000) == 0x80000000)
+			return 1;
+		/* branch into selected range */
+		if (((control & 0x80800000) == 0x80800000) &&
+		    regs->psw.addr >= current->thread.per_user.start &&
+		    regs->psw.addr <= current->thread.per_user.end)
+			return 1;
+	}
+	return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	int fixup = probe_get_fixup_type(auprobe->insn);
+	struct uprobe_task *utask = current->utask;
+
+	clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+	update_cr_regs(current);
+	psw_bits(regs->psw).r = auprobe->saved_per;
+	regs->int_code = auprobe->saved_int_code;
+
+	if (fixup & FIXUP_PSW_NORMAL)
+		regs->psw.addr += utask->vaddr - utask->xol_vaddr;
+	if (fixup & FIXUP_RETURN_REGISTER) {
+		int reg = (auprobe->insn[0] & 0xf0) >> 4;
+
+		regs->gprs[reg] += utask->vaddr - utask->xol_vaddr;
+	}
+	if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+		int ilen = insn_length(auprobe->insn[0] >> 8);
+
+		if (regs->psw.addr - utask->xol_vaddr == ilen)
+			regs->psw.addr = utask->vaddr + ilen;
+	}
+	if (check_per_event(current->thread.per_event.cause,
+			    current->thread.per_user.control, regs)) {
+		/* fix per address */
+		current->thread.per_event.address = utask->vaddr;
+		/* trigger per event */
+		set_pt_regs_flag(regs, PIF_PER_TRAP);
+	}
+	return 0;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+				 void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	if (!user_mode(regs))
+		return NOTIFY_DONE;
+	if (regs->int_code & 0x200) /* Trap during transaction */
+		return NOTIFY_DONE;
+	switch (val) {
+	case DIE_BPT:
+		if (uprobe_pre_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (uprobe_post_sstep_notifier(regs))
+			return NOTIFY_STOP;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	clear_thread_flag(TIF_UPROBE_SINGLESTEP);
+	regs->int_code = auprobe->saved_int_code;
+	regs->psw.addr = current->utask->vaddr;
+	current->thread.per_event.address = current->utask->vaddr;
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+						struct pt_regs *regs)
+{
+	unsigned long orig;
+
+	orig = regs->gprs[14];
+	regs->gprs[14] = trampoline;
+	return orig;
+}
+
+/* Instruction Emulation */
+
+static void adjust_psw_addr(psw_t *psw, unsigned long len)
+{
+	psw->addr = __rewind_psw(*psw, -len);
+}
+
+#define EMU_ILLEGAL_OP		1
+#define EMU_SPECIFICATION	2
+#define EMU_ADDRESSING		3
+
+#define emu_load_ril(ptr, output)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(*(ptr)) input;			\
+	int __rc = 0;					\
+							\
+	if (!test_facility(34))				\
+		__rc = EMU_ILLEGAL_OP;			\
+	else if ((u64 __force)ptr & mask)		\
+		__rc = EMU_SPECIFICATION;		\
+	else if (get_user(input, ptr))			\
+		__rc = EMU_ADDRESSING;			\
+	else						\
+		*(output) = input;			\
+	__rc;						\
+})
+
+#define emu_store_ril(regs, ptr, input)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(ptr) __ptr = (ptr);			\
+	int __rc = 0;					\
+							\
+	if (!test_facility(34))				\
+		__rc = EMU_ILLEGAL_OP;			\
+	else if ((u64 __force)__ptr & mask)		\
+		__rc = EMU_SPECIFICATION;		\
+	else if (put_user(*(input), __ptr))		\
+		__rc = EMU_ADDRESSING;			\
+	if (__rc == 0)					\
+		sim_stor_event(regs,			\
+			       (void __force *)__ptr,	\
+			       mask + 1);		\
+	__rc;						\
+})
+
+#define emu_cmp_ril(regs, ptr, cmp)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(*(ptr)) input;			\
+	int __rc = 0;					\
+							\
+	if (!test_facility(34))				\
+		__rc = EMU_ILLEGAL_OP;			\
+	else if ((u64 __force)ptr & mask)		\
+		__rc = EMU_SPECIFICATION;		\
+	else if (get_user(input, ptr))			\
+		__rc = EMU_ADDRESSING;			\
+	else if (input > *(cmp))			\
+		psw_bits((regs)->psw).cc = 1;		\
+	else if (input < *(cmp))			\
+		psw_bits((regs)->psw).cc = 2;		\
+	else						\
+		psw_bits((regs)->psw).cc = 0;		\
+	__rc;						\
+})
+
+struct insn_ril {
+	u8 opc0;
+	u8 reg	: 4;
+	u8 opc1 : 4;
+	s32 disp;
+} __packed;
+
+union split_register {
+	u64 u64;
+	u32 u32[2];
+	u16 u16[4];
+	s64 s64;
+	s32 s32[2];
+	s16 s16[4];
+};
+
+/*
+ * If user per registers are setup to trace storage alterations and an
+ * emulated store took place on a fitting address a user trap is generated.
+ */
+static void sim_stor_event(struct pt_regs *regs, void *addr, int len)
+{
+	if (!(regs->psw.mask & PSW_MASK_PER))
+		return;
+	if (!(current->thread.per_user.control & PER_EVENT_STORE))
+		return;
+	if ((void *)current->thread.per_user.start > (addr + len))
+		return;
+	if ((void *)current->thread.per_user.end < addr)
+		return;
+	current->thread.per_event.address = regs->psw.addr;
+	current->thread.per_event.cause = PER_EVENT_STORE >> 16;
+	set_pt_regs_flag(regs, PIF_PER_TRAP);
+}
+
+/*
+ * pc relative instructions are emulated, since parameters may not be
+ * accessible from the xol area due to range limitations.
+ */
+static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	union split_register *rx;
+	struct insn_ril *insn;
+	unsigned int ilen;
+	void *uptr;
+	int rc = 0;
+
+	insn = (struct insn_ril *) &auprobe->insn;
+	rx = (union split_register *) &regs->gprs[insn->reg];
+	uptr = (void *)(regs->psw.addr + (insn->disp * 2));
+	ilen = insn_length(insn->opc0);
+
+	switch (insn->opc0) {
+	case 0xc0:
+		switch (insn->opc1) {
+		case 0x00: /* larl */
+			rx->u64 = (unsigned long)uptr;
+			break;
+		}
+		break;
+	case 0xc4:
+		switch (insn->opc1) {
+		case 0x02: /* llhrl */
+			rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x04: /* lghrl */
+			rc = emu_load_ril((s16 __user *)uptr, &rx->u64);
+			break;
+		case 0x05: /* lhrl */
+			rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x06: /* llghrl */
+			rc = emu_load_ril((u16 __user *)uptr, &rx->u64);
+			break;
+		case 0x08: /* lgrl */
+			rc = emu_load_ril((u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0c: /* lgfrl */
+			rc = emu_load_ril((s32 __user *)uptr, &rx->u64);
+			break;
+		case 0x0d: /* lrl */
+			rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x0e: /* llgfrl */
+			rc = emu_load_ril((u32 __user *)uptr, &rx->u64);
+			break;
+		case 0x07: /* sthrl */
+			rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]);
+			break;
+		case 0x0b: /* stgrl */
+			rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0f: /* strl */
+			rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+			break;
+		}
+		break;
+	case 0xc6:
+		switch (insn->opc1) {
+		case 0x02: /* pfdrl */
+			if (!test_facility(34))
+				rc = EMU_ILLEGAL_OP;
+			break;
+		case 0x04: /* cghrl */
+			rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64);
+			break;
+		case 0x05: /* chrl */
+			rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]);
+			break;
+		case 0x06: /* clghrl */
+			rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64);
+			break;
+		case 0x07: /* clhrl */
+			rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x08: /* cgrl */
+			rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64);
+			break;
+		case 0x0a: /* clgrl */
+			rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0c: /* cgfrl */
+			rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64);
+			break;
+		case 0x0d: /* crl */
+			rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]);
+			break;
+		case 0x0e: /* clgfrl */
+			rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64);
+			break;
+		case 0x0f: /* clrl */
+			rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+			break;
+		}
+		break;
+	}
+	adjust_psw_addr(&regs->psw, ilen);
+	switch (rc) {
+	case EMU_ILLEGAL_OP:
+		regs->int_code = ilen << 16 | 0x0001;
+		do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL);
+		break;
+	case EMU_SPECIFICATION:
+		regs->int_code = ilen << 16 | 0x0006;
+		do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL);
+		break;
+	case EMU_ADDRESSING:
+		regs->int_code = ilen << 16 | 0x0005;
+		do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL);
+		break;
+	}
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) ||
+	    ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) &&
+	     !is_compat_task())) {
+		regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
+		do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
+		return true;
+	}
+	if (probe_is_insn_relative_long(auprobe->insn)) {
+		handle_insn_ril(auprobe, regs);
+		return true;
+	}
+	return false;
+}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
new file mode 100644
index 000000000..0d58269ff
--- /dev/null
+++ b/arch/s390/kernel/vdso.c
@@ -0,0 +1,306 @@
+/*
+ * vdso setup for s390
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/facility.h>
+
+#ifdef CONFIG_COMPAT
+extern char vdso32_start, vdso32_end;
+static void *vdso32_kbase = &vdso32_start;
+static unsigned int vdso32_pages;
+static struct page **vdso32_pagelist;
+#endif
+
+extern char vdso64_start, vdso64_end;
+static void *vdso64_kbase = &vdso64_start;
+static unsigned int vdso64_pages;
+static struct page **vdso64_pagelist;
+
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int __read_mostly vdso_enabled = 1;
+
+static int __init vdso_setup(char *s)
+{
+	unsigned long val;
+	int rc;
+
+	rc = 0;
+	if (strncmp(s, "on", 3) == 0)
+		vdso_enabled = 1;
+	else if (strncmp(s, "off", 4) == 0)
+		vdso_enabled = 0;
+	else {
+		rc = kstrtoul(s, 0, &val);
+		vdso_enabled = rc ? 0 : !!val;
+	}
+	return !rc;
+}
+__setup("vdso=", vdso_setup);
+
+/*
+ * The vdso data page
+ */
+static union {
+	struct vdso_data	data;
+	u8			page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+/*
+ * Setup vdso data page.
+ */
+static void vdso_init_data(struct vdso_data *vd)
+{
+	vd->ectg_available = test_facility(31);
+}
+
+/*
+ * Allocate/free per cpu vdso data.
+ */
+#define SEGMENT_ORDER	2
+
+int vdso_alloc_per_cpu(struct _lowcore *lowcore)
+{
+	unsigned long segment_table, page_table, page_frame;
+	u32 *psal, *aste;
+	int i;
+
+	lowcore->vdso_per_cpu_data = __LC_PASTE;
+
+	if (!vdso_enabled)
+		return 0;
+
+	segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
+	page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	page_frame = get_zeroed_page(GFP_KERNEL);
+	if (!segment_table || !page_table || !page_frame)
+		goto out;
+
+	clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
+		    PAGE_SIZE << SEGMENT_ORDER);
+	clear_table((unsigned long *) page_table, _PAGE_INVALID,
+		    256*sizeof(unsigned long));
+
+	*(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
+	*(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
+
+	psal = (u32 *) (page_table + 256*sizeof(unsigned long));
+	aste = psal + 32;
+
+	for (i = 4; i < 32; i += 4)
+		psal[i] = 0x80000000;
+
+	lowcore->paste[4] = (u32)(addr_t) psal;
+	psal[0] = 0x02000000;
+	psal[2] = (u32)(addr_t) aste;
+	*(unsigned long *) (aste + 2) = segment_table +
+		_ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
+	aste[4] = (u32)(addr_t) psal;
+	lowcore->vdso_per_cpu_data = page_frame;
+
+	return 0;
+
+out:
+	free_page(page_frame);
+	free_page(page_table);
+	free_pages(segment_table, SEGMENT_ORDER);
+	return -ENOMEM;
+}
+
+void vdso_free_per_cpu(struct _lowcore *lowcore)
+{
+	unsigned long segment_table, page_table, page_frame;
+	u32 *psal, *aste;
+
+	if (!vdso_enabled)
+		return;
+
+	psal = (u32 *)(addr_t) lowcore->paste[4];
+	aste = (u32 *)(addr_t) psal[2];
+	segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
+	page_table = *(unsigned long *) segment_table;
+	page_frame = *(unsigned long *) page_table;
+
+	free_page(page_frame);
+	free_page(page_table);
+	free_pages(segment_table, SEGMENT_ORDER);
+}
+
+static void vdso_init_cr5(void)
+{
+	unsigned long cr5;
+
+	if (!vdso_enabled)
+		return;
+	cr5 = offsetof(struct _lowcore, paste);
+	__ctl_load(cr5, 5, 5);
+}
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	struct page **vdso_pagelist;
+	unsigned long vdso_pages;
+	unsigned long vdso_base;
+	int rc;
+
+	if (!vdso_enabled)
+		return 0;
+	/*
+	 * Only map the vdso for dynamically linked elf binaries.
+	 */
+	if (!uses_interp)
+		return 0;
+
+	vdso_pagelist = vdso64_pagelist;
+	vdso_pages = vdso64_pages;
+#ifdef CONFIG_COMPAT
+	if (is_compat_task()) {
+		vdso_pagelist = vdso32_pagelist;
+		vdso_pages = vdso32_pages;
+	}
+#endif
+	/*
+	 * vDSO has a problem and was disabled, just don't "enable" it for
+	 * the process
+	 */
+	if (vdso_pages == 0)
+		return 0;
+
+	current->mm->context.vdso_base = 0;
+
+	/*
+	 * pick a base address for the vDSO in process space. We try to put
+	 * it at vdso_base which is the "natural" base for it, but we might
+	 * fail and end up putting it elsewhere.
+	 */
+	down_write(&mm->mmap_sem);
+	vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
+	if (IS_ERR_VALUE(vdso_base)) {
+		rc = vdso_base;
+		goto out_up;
+	}
+
+	/*
+	 * Put vDSO base into mm struct. We need to do this before calling
+	 * install_special_mapping or the perf counter mmap tracking code
+	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
+	 */
+	current->mm->context.vdso_base = vdso_base;
+
+	/*
+	 * our vma flags don't have VM_WRITE so by default, the process
+	 * isn't allowed to write those pages.
+	 * gdb can break that with ptrace interface, and thus trigger COW
+	 * on those pages but it's then your responsibility to never do that
+	 * on the "data" page of the vDSO or you'll stop getting kernel
+	 * updates and your nice userland gettimeofday will be totally dead.
+	 * It's fine to use that for setting breakpoints in the vDSO code
+	 * pages though.
+	 */
+	rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+				     VM_READ|VM_EXEC|
+				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				     vdso_pagelist);
+	if (rc)
+		current->mm->context.vdso_base = 0;
+out_up:
+	up_write(&mm->mmap_sem);
+	return rc;
+}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+	if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
+		return "[vdso]";
+	return NULL;
+}
+
+static int __init vdso_init(void)
+{
+	int i;
+
+	if (!vdso_enabled)
+		return 0;
+	vdso_init_data(vdso_data);
+#ifdef CONFIG_COMPAT
+	/* Calculate the size of the 32 bit vDSO */
+	vdso32_pages = ((&vdso32_end - &vdso32_start
+			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
+
+	/* Make sure pages are in the correct state */
+	vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1),
+				  GFP_KERNEL);
+	BUG_ON(vdso32_pagelist == NULL);
+	for (i = 0; i < vdso32_pages - 1; i++) {
+		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+		ClearPageReserved(pg);
+		get_page(pg);
+		vdso32_pagelist[i] = pg;
+	}
+	vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
+	vdso32_pagelist[vdso32_pages] = NULL;
+#endif
+
+	/* Calculate the size of the 64 bit vDSO */
+	vdso64_pages = ((&vdso64_end - &vdso64_start
+			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
+
+	/* Make sure pages are in the correct state */
+	vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1),
+				  GFP_KERNEL);
+	BUG_ON(vdso64_pagelist == NULL);
+	for (i = 0; i < vdso64_pages - 1; i++) {
+		struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+		ClearPageReserved(pg);
+		get_page(pg);
+		vdso64_pagelist[i] = pg;
+	}
+	vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
+	vdso64_pagelist[vdso64_pages] = NULL;
+	if (vdso_alloc_per_cpu(&S390_lowcore))
+		BUG();
+	vdso_init_cr5();
+
+	get_page(virt_to_page(vdso_data));
+
+	smp_wmb();
+
+	return 0;
+}
+early_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
new file mode 100644
index 000000000..e45fba9d0
--- /dev/null
+++ b/arch/s390/kernel/vdso32/.gitignore
@@ -0,0 +1 @@
+vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
new file mode 100644
index 000000000..8ad2b34ad
--- /dev/null
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -0,0 +1,58 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_31 += -m31 -s
+
+KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+	$(call if_changed,vdso32ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S
+	$(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+      cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+      cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso32.so: $(obj)/vdso32.so.dbg
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
new file mode 100644
index 000000000..eca3f001f
--- /dev/null
+++ b/arch/s390/kernel/vdso32/clock_getres.S
@@ -0,0 +1,46 @@
+/*
+ * Userland implementation of clock_getres() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+	.text
+	.align 4
+	.globl __kernel_clock_getres
+	.type  __kernel_clock_getres,@function
+__kernel_clock_getres:
+	.cfi_startproc
+	basr	%r1,0
+	la	%r1,4f-.(%r1)
+	chi	%r2,__CLOCK_REALTIME
+	je	0f
+	chi	%r2,__CLOCK_MONOTONIC
+	je	0f
+	la	%r1,5f-4f(%r1)
+	chi	%r2,__CLOCK_REALTIME_COARSE
+	je	0f
+	chi	%r2,__CLOCK_MONOTONIC_COARSE
+	jne	3f
+0:	ltr	%r3,%r3
+	jz	2f				/* res == NULL */
+1:	l	%r0,0(%r1)
+	xc	0(4,%r3),0(%r3)			/* set tp->tv_sec to zero */
+	st	%r0,4(%r3)			/* store tp->tv_usec */
+2:	lhi	%r2,0
+	br	%r14
+3:	lhi	%r1,__NR_clock_getres		/* fallback to svc */
+	svc	0
+	br	%r14
+4:	.long	__CLOCK_REALTIME_RES
+5:	.long	__CLOCK_COARSE_RES
+	.cfi_endproc
+	.size	__kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
new file mode 100644
index 000000000..5eec9afbb
--- /dev/null
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -0,0 +1,149 @@
+/*
+ * Userland implementation of clock_gettime() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+	.text
+	.align 4
+	.globl __kernel_clock_gettime
+	.type  __kernel_clock_gettime,@function
+__kernel_clock_gettime:
+	.cfi_startproc
+	ahi	%r15,-16
+	basr	%r5,0
+0:	al	%r5,21f-0b(%r5)			/* get &_vdso_data */
+	chi	%r2,__CLOCK_REALTIME_COARSE
+	je	10f
+	chi	%r2,__CLOCK_REALTIME
+	je	11f
+	chi	%r2,__CLOCK_MONOTONIC_COARSE
+	je	9f
+	chi	%r2,__CLOCK_MONOTONIC
+	jne	19f
+
+	/* CLOCK_MONOTONIC */
+1:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
+	tml	%r4,0x0001			/* pending update ? loop */
+	jnz	1b
+	stcke	0(%r15)				/* Store TOD clock */
+	lm	%r0,%r1,1(%r15)
+	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
+	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
+	brc	3,2f
+	ahi	%r0,-1
+2:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
+	lr	%r2,%r0
+	l	%r0,__VDSO_TK_MULT(%r5)
+	ltr	%r1,%r1
+	mr	%r0,%r0
+	jnm	3f
+	a	%r0,__VDSO_TK_MULT(%r5)
+3:	alr	%r0,%r2
+	al	%r0,__VDSO_WTOM_NSEC(%r5)
+	al	%r1,__VDSO_WTOM_NSEC+4(%r5)
+	brc	12,5f
+	ahi	%r0,1
+5:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
+	srdl	%r0,0(%r2)			/*  >> tk->shift */
+	l	%r2,__VDSO_WTOM_SEC+4(%r5)
+	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
+	jne	1b
+	basr	%r5,0
+6:	ltr	%r0,%r0
+	jnz	7f
+	cl	%r1,20f-6b(%r5)
+	jl	8f
+7:	ahi	%r2,1
+	sl	%r1,20f-6b(%r5)
+	brc	3,6b
+	ahi	%r0,-1
+	j	6b
+8:	st	%r2,0(%r3)			/* store tp->tv_sec */
+	st	%r1,4(%r3)			/* store tp->tv_nsec */
+	lhi	%r2,0
+	ahi	%r15,16
+	br	%r14
+
+	/* CLOCK_MONOTONIC_COARSE */
+9:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
+	tml	%r4,0x0001			/* pending update ? loop */
+	jnz	9b
+	l	%r2,__VDSO_WTOM_CRS_SEC+4(%r5)
+	l	%r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
+	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
+	jne	9b
+	j	8b
+
+	/* CLOCK_REALTIME_COARSE */
+10:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
+	tml	%r4,0x0001			/* pending update ? loop */
+	jnz	10b
+	l	%r2,__VDSO_XTIME_CRS_SEC+4(%r5)
+	l	%r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
+	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
+	jne	10b
+	j	17f
+
+	/* CLOCK_REALTIME */
+11:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
+	tml	%r4,0x0001			/* pending update ? loop */
+	jnz	11b
+	stcke	0(%r15)				/* Store TOD clock */
+	lm	%r0,%r1,1(%r15)
+	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
+	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
+	brc	3,12f
+	ahi	%r0,-1
+12:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
+	lr	%r2,%r0
+	l	%r0,__VDSO_TK_MULT(%r5)
+	ltr	%r1,%r1
+	mr	%r0,%r0
+	jnm	13f
+	a	%r0,__VDSO_TK_MULT(%r5)
+13:	alr	%r0,%r2
+	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
+	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
+	brc	12,14f
+	ahi	%r0,1
+14:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
+	srdl	%r0,0(%r2)			/*  >> tk->shift */
+	l	%r2,__VDSO_XTIME_SEC+4(%r5)
+	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
+	jne	11b
+	basr	%r5,0
+15:	ltr	%r0,%r0
+	jnz	16f
+	cl	%r1,20f-15b(%r5)
+	jl	17f
+16:	ahi	%r2,1
+	sl	%r1,20f-15b(%r5)
+	brc	3,15b
+	ahi	%r0,-1
+	j	15b
+17:	st	%r2,0(%r3)			/* store tp->tv_sec */
+	st	%r1,4(%r3)			/* store tp->tv_nsec */
+	lhi	%r2,0
+	ahi	%r15,16
+	br	%r14
+
+	/* Fallback to system call */
+19:	lhi	%r1,__NR_clock_gettime
+	svc	0
+	ahi	%r15,16
+	br	%r14
+
+20:	.long	1000000000
+21:	.long	_vdso_data - 0b
+	.cfi_endproc
+	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
new file mode 100644
index 000000000..719de6186
--- /dev/null
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,81 @@
+/*
+ * Userland implementation of gettimeofday() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+	.text
+	.align 4
+	.globl __kernel_gettimeofday
+	.type  __kernel_gettimeofday,@function
+__kernel_gettimeofday:
+	.cfi_startproc
+	ahi	%r15,-16
+	basr	%r5,0
+0:	al	%r5,13f-0b(%r5)			/* get &_vdso_data */
+1:	ltr	%r3,%r3				/* check if tz is NULL */
+	je	2f
+	mvc	0(8,%r3),__VDSO_TIMEZONE(%r5)
+2:	ltr	%r2,%r2				/* check if tv is NULL */
+	je	10f
+	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
+	tml	%r4,0x0001			/* pending update ? loop */
+	jnz	1b
+	stcke	0(%r15)				/* Store TOD clock */
+	lm	%r0,%r1,1(%r15)
+	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
+	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
+	brc	3,3f
+	ahi	%r0,-1
+3:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
+	st	%r0,0(%r15)
+	l	%r0,__VDSO_TK_MULT(%r5)
+	ltr	%r1,%r1
+	mr	%r0,%r0
+	jnm	4f
+	a	%r0,__VDSO_TK_MULT(%r5)
+4:	al	%r0,0(%r15)
+	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + xtime */
+	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
+	brc	12,5f
+	ahi	%r0,1
+5:	mvc	0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
+	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
+	jne	1b
+	l	%r4,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
+	srdl	%r0,0(%r4)			/*  >> tk->shift */
+	l	%r4,0(%r15)			/* get tv_sec from stack */
+	basr	%r5,0
+6:	ltr	%r0,%r0
+	jnz	7f
+	cl	%r1,11f-6b(%r5)
+	jl	8f
+7:	ahi	%r4,1
+	sl	%r1,11f-6b(%r5)
+	brc	3,6b
+	ahi	%r0,-1
+	j	6b
+8:	st	%r4,0(%r2)			/* store tv->tv_sec */
+	ltr	%r1,%r1
+	m	%r0,12f-6b(%r5)
+	jnm	9f
+	al	%r0,12f-6b(%r5)
+9:	srl	%r0,6
+	st	%r0,4(%r2)			/* store tv->tv_usec */
+10:	slr	%r2,%r2
+	ahi	%r15,16
+	br	%r14
+11:	.long	1000000000
+12:	.long	274877907
+13:	.long	_vdso_data - 0b
+	.cfi_endproc
+	.size	__kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
new file mode 100644
index 000000000..79a071e43
--- /dev/null
+++ b/arch/s390/kernel/vdso32/note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 000000000..a8c379fa1
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,138 @@
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+OUTPUT_ARCH(s390:31-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+	. = VDSO32_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.*)
+	} :text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
+	.got ALIGN(8)	: { *(.got .toc) }
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/*
+	 * Stabs debugging sections are here too.
+	 */
+	.stab	       0 : { *(.stab) }
+	.stabstr       0 : { *(.stabstr) }
+	.stab.excl     0 : { *(.stab.excl) }
+	.stab.exclstr  0 : { *(.stab.exclstr) }
+	.stab.index    0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment       0 : { *(.comment) }
+
+	/*
+	 * DWARF debug sections.
+	 * Symbols in the DWARF debugging sections are relative to the
+	 * beginning of the section so we begin them at 0.
+	 */
+	/* DWARF 1 */
+	.debug		0 : { *(.debug) }
+	.line		0 : { *(.line) }
+	/* GNU DWARF 1 extensions */
+	.debug_srcinfo	0 : { *(.debug_srcinfo) }
+	.debug_sfnames	0 : { *(.debug_sfnames) }
+	/* DWARF 1.1 and DWARF 2 */
+	.debug_aranges	0 : { *(.debug_aranges) }
+	.debug_pubnames 0 : { *(.debug_pubnames) }
+	/* DWARF 2 */
+	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
+	.debug_abbrev	0 : { *(.debug_abbrev) }
+	.debug_line	0 : { *(.debug_line) }
+	.debug_frame	0 : { *(.debug_frame) }
+	.debug_str	0 : { *(.debug_str) }
+	.debug_loc	0 : { *(.debug_loc) }
+	.debug_macinfo	0 : { *(.debug_macinfo) }
+	/* SGI/MIPS DWARF 2 extensions */
+	.debug_weaknames 0 : { *(.debug_weaknames) }
+	.debug_funcnames 0 : { *(.debug_funcnames) }
+	.debug_typenames 0 : { *(.debug_typenames) }
+	.debug_varnames  0 : { *(.debug_varnames) }
+	/* DWARF 3 */
+	.debug_pubtypes 0 : { *(.debug_pubtypes) }
+	.debug_ranges	0 : { *(.debug_ranges) }
+	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+	. = ALIGN(4096);
+	PROVIDE(_vdso_data = .);
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	VDSO_VERSION_STRING {
+	global:
+		/*
+		 * Has to be there for the kernel to find
+		 */
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_getres;
+
+	local: *;
+	};
+}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 000000000..ae42f8ce3
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso32_start, vdso32_end
+	.balign PAGE_SIZE
+vdso32_start:
+	.incbin "arch/s390/kernel/vdso32/vdso32.so"
+	.balign PAGE_SIZE
+vdso32_end:
+
+	.previous
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
new file mode 100644
index 000000000..3fd18cf9f
--- /dev/null
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -0,0 +1 @@
+vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
new file mode 100644
index 000000000..2a8ddfd12
--- /dev/null
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -0,0 +1,58 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_64 += -m64 -s
+
+KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+			$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+	$(call if_changed,vdso64ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S
+	$(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+      cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso64.so: $(obj)/vdso64.so.dbg
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+vdso_install: vdso64.so
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
new file mode 100644
index 000000000..c8513deb8
--- /dev/null
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -0,0 +1,52 @@
+/*
+ * Userland implementation of clock_getres() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+	.text
+	.align 4
+	.globl __kernel_clock_getres
+	.type  __kernel_clock_getres,@function
+__kernel_clock_getres:
+	.cfi_startproc
+	larl	%r1,4f
+	cghi	%r2,__CLOCK_REALTIME_COARSE
+	je	0f
+	cghi	%r2,__CLOCK_MONOTONIC_COARSE
+	je	0f
+	larl	%r1,3f
+	cghi	%r2,__CLOCK_REALTIME
+	je	0f
+	cghi	%r2,__CLOCK_MONOTONIC
+	je	0f
+	cghi	%r2,__CLOCK_THREAD_CPUTIME_ID
+	je	0f
+	cghi	%r2,-2		/* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+	jne	2f
+	larl	%r5,_vdso_data
+	icm	%r0,15,__LC_ECTG_OK(%r5)
+	jz	2f
+0:	ltgr	%r3,%r3
+	jz	1f				/* res == NULL */
+	lg	%r0,0(%r1)
+	xc	0(8,%r3),0(%r3)			/* set tp->tv_sec to zero */
+	stg	%r0,8(%r3)			/* store tp->tv_usec */
+1:	lghi	%r2,0
+	br	%r14
+2:	lghi	%r1,__NR_clock_getres		/* fallback to svc */
+	svc	0
+	br	%r14
+3:	.quad	__CLOCK_REALTIME_RES
+4:	.quad	__CLOCK_COARSE_RES
+	.cfi_endproc
+	.size	__kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
new file mode 100644
index 000000000..61541fb93
--- /dev/null
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -0,0 +1,150 @@
+/*
+ * Userland implementation of clock_gettime() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+	.text
+	.align 4
+	.globl __kernel_clock_gettime
+	.type  __kernel_clock_gettime,@function
+__kernel_clock_gettime:
+	.cfi_startproc
+	aghi	%r15,-16
+	larl	%r5,_vdso_data
+	cghi	%r2,__CLOCK_REALTIME_COARSE
+	je	4f
+	cghi	%r2,__CLOCK_REALTIME
+	je	5f
+	cghi	%r2,-3		/* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+	je	9f
+	cghi	%r2,__CLOCK_MONOTONIC_COARSE
+	je	3f
+	cghi	%r2,__CLOCK_MONOTONIC
+	jne	12f
+
+	/* CLOCK_MONOTONIC */
+0:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
+	tmll	%r4,0x0001			/* pending update ? loop */
+	jnz	0b
+	stcke	0(%r15)				/* Store TOD clock */
+	lgf	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
+	lg	%r0,__VDSO_WTOM_SEC(%r5)
+	lg	%r1,1(%r15)
+	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
+	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
+	alg	%r1,__VDSO_WTOM_NSEC(%r5)
+	srlg	%r1,%r1,0(%r2)			/*  >> tk->shift */
+	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
+	jne	0b
+	larl	%r5,13f
+1:	clg	%r1,0(%r5)
+	jl	2f
+	slg	%r1,0(%r5)
+	aghi	%r0,1
+	j	1b
+2:	stg	%r0,0(%r3)			/* store tp->tv_sec */
+	stg	%r1,8(%r3)			/* store tp->tv_nsec */
+	lghi	%r2,0
+	aghi	%r15,16
+	br	%r14
+
+	/* CLOCK_MONOTONIC_COARSE */
+3:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
+	tmll	%r4,0x0001			/* pending update ? loop */
+	jnz	3b
+	lg	%r0,__VDSO_WTOM_CRS_SEC(%r5)
+	lg	%r1,__VDSO_WTOM_CRS_NSEC(%r5)
+	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
+	jne	3b
+	j	2b
+
+	/* CLOCK_REALTIME_COARSE */
+4:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
+	tmll	%r4,0x0001			/* pending update ? loop */
+	jnz	4b
+	lg	%r0,__VDSO_XTIME_CRS_SEC(%r5)
+	lg	%r1,__VDSO_XTIME_CRS_NSEC(%r5)
+	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
+	jne	4b
+	j	7f
+
+	/* CLOCK_REALTIME */
+5:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
+	tmll	%r4,0x0001			/* pending update ? loop */
+	jnz	5b
+	stcke	0(%r15)				/* Store TOD clock */
+	lgf	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
+	lg	%r1,1(%r15)
+	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
+	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
+	alg	%r1,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
+	srlg	%r1,%r1,0(%r2)			/*  >> tk->shift */
+	lg	%r0,__VDSO_XTIME_SEC(%r5)	/* tk->xtime_sec */
+	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
+	jne	5b
+	larl	%r5,13f
+6:	clg	%r1,0(%r5)
+	jl	7f
+	slg	%r1,0(%r5)
+	aghi	%r0,1
+	j	6b
+7:	stg	%r0,0(%r3)			/* store tp->tv_sec */
+	stg	%r1,8(%r3)			/* store tp->tv_nsec */
+	lghi	%r2,0
+	aghi	%r15,16
+	br	%r14
+
+	/* CPUCLOCK_VIRT for this thread */
+9:	icm	%r0,15,__VDSO_ECTG_OK(%r5)
+	jz	12f
+	ear	%r2,%a4
+	llilh	%r4,0x0100
+	sar	%a4,%r4
+	lghi	%r4,0
+	epsw	%r5,0
+	sacf	512				/* Magic ectg instruction */
+	.insn	ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
+	tml	%r5,0x4000
+	jo	11f
+	tml	%r5,0x8000
+	jno	10f
+	sacf	256
+	j	11f
+10:	sacf	0
+11:	sar	%a4,%r2
+	algr	%r1,%r0				/* r1 = cputime as TOD value */
+	mghi	%r1,1000			/* convert to nanoseconds */
+	srlg	%r1,%r1,12			/* r1 = cputime in nanosec */
+	lgr	%r4,%r1
+	larl	%r5,13f
+	srlg	%r1,%r1,9			/* divide by 1000000000 */
+	mlg	%r0,8(%r5)
+	srlg	%r0,%r0,11			/* r0 = tv_sec */
+	stg	%r0,0(%r3)
+	msg	%r0,0(%r5)			/* calculate tv_nsec */
+	slgr	%r4,%r0				/* r4 = tv_nsec */
+	stg	%r4,8(%r3)
+	lghi	%r2,0
+	aghi	%r15,16
+	br	%r14
+
+	/* Fallback to system call */
+12:	lghi	%r1,__NR_clock_gettime
+	svc	0
+	aghi	%r15,16
+	br	%r14
+
+13:	.quad	1000000000
+14:	.quad	19342813113834067
+	.cfi_endproc
+	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
new file mode 100644
index 000000000..6ce467076
--- /dev/null
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,59 @@
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+	.text
+	.align 4
+	.globl __kernel_gettimeofday
+	.type  __kernel_gettimeofday,@function
+__kernel_gettimeofday:
+	.cfi_startproc
+	aghi	%r15,-16
+	larl	%r5,_vdso_data
+0:	ltgr	%r3,%r3				/* check if tz is NULL */
+	je	1f
+	mvc	0(8,%r3),__VDSO_TIMEZONE(%r5)
+1:	ltgr	%r2,%r2				/* check if tv is NULL */
+	je	4f
+	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
+	tmll	%r4,0x0001			/* pending update ? loop */
+	jnz	0b
+	stcke	0(%r15)				/* Store TOD clock */
+	lg	%r1,1(%r15)
+	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
+	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
+	alg	%r1,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
+	lg	%r0,__VDSO_XTIME_SEC(%r5)	/* tk->xtime_sec */
+	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
+	jne	0b
+	lgf	%r5,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
+	srlg	%r1,%r1,0(%r5)			/*  >> tk->shift */
+	larl	%r5,5f
+2:	clg	%r1,0(%r5)
+	jl	3f
+	slg	%r1,0(%r5)
+	aghi	%r0,1
+	j	2b
+3:	stg	%r0,0(%r2)			/* store tv->tv_sec */
+	slgr	%r0,%r0				/* tv_nsec -> tv_usec */
+	ml	%r0,8(%r5)
+	srlg	%r0,%r0,6
+	stg	%r0,8(%r2)			/* store tv->tv_usec */
+4:	lghi	%r2,0
+	aghi	%r15,16
+	br	%r14
+5:	.quad	1000000000
+	.long	274877907
+	.cfi_endproc
+	.size	__kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
new file mode 100644
index 000000000..79a071e43
--- /dev/null
+++ b/arch/s390/kernel/vdso64/note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 000000000..9f5979d10
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,138 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+	. = VDSO64_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.*)
+	} :text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
+	.got ALIGN(8)	: { *(.got .toc) }
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/*
+	 * Stabs debugging sections are here too.
+	 */
+	.stab	       0 : { *(.stab) }
+	.stabstr       0 : { *(.stabstr) }
+	.stab.excl     0 : { *(.stab.excl) }
+	.stab.exclstr  0 : { *(.stab.exclstr) }
+	.stab.index    0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment       0 : { *(.comment) }
+
+	/*
+	 * DWARF debug sections.
+	 * Symbols in the DWARF debugging sections are relative to the
+	 * beginning of the section so we begin them at 0.
+	 */
+	/* DWARF 1 */
+	.debug		0 : { *(.debug) }
+	.line		0 : { *(.line) }
+	/* GNU DWARF 1 extensions */
+	.debug_srcinfo	0 : { *(.debug_srcinfo) }
+	.debug_sfnames	0 : { *(.debug_sfnames) }
+	/* DWARF 1.1 and DWARF 2 */
+	.debug_aranges	0 : { *(.debug_aranges) }
+	.debug_pubnames 0 : { *(.debug_pubnames) }
+	/* DWARF 2 */
+	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
+	.debug_abbrev	0 : { *(.debug_abbrev) }
+	.debug_line	0 : { *(.debug_line) }
+	.debug_frame	0 : { *(.debug_frame) }
+	.debug_str	0 : { *(.debug_str) }
+	.debug_loc	0 : { *(.debug_loc) }
+	.debug_macinfo	0 : { *(.debug_macinfo) }
+	/* SGI/MIPS DWARF 2 extensions */
+	.debug_weaknames 0 : { *(.debug_weaknames) }
+	.debug_funcnames 0 : { *(.debug_funcnames) }
+	.debug_typenames 0 : { *(.debug_typenames) }
+	.debug_varnames  0 : { *(.debug_varnames) }
+	/* DWARF 3 */
+	.debug_pubtypes 0 : { *(.debug_pubtypes) }
+	.debug_ranges	0 : { *(.debug_ranges) }
+	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+	. = ALIGN(4096);
+	PROVIDE(_vdso_data = .);
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	VDSO_VERSION_STRING {
+	global:
+		/*
+		 * Has to be there for the kernel to find
+		 */
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_getres;
+
+	local: *;
+	};
+}
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 000000000..c245842b5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso64_start, vdso64_end
+	.balign PAGE_SIZE
+vdso64_start:
+	.incbin "arch/s390/kernel/vdso64/vdso64.so"
+	.balign PAGE_SIZE
+vdso64_end:
+
+	.previous
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
new file mode 100644
index 000000000..445657fe6
--- /dev/null
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -0,0 +1,93 @@
+/* ld script to make s390 Linux kernel
+ * Written by Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(startup)
+jiffies = jiffies_64;
+
+PHDRS {
+	text PT_LOAD FLAGS(5);	/* R_E */
+	data PT_LOAD FLAGS(7);	/* RWE */
+	note PT_NOTE FLAGS(0);	/* ___ */
+}
+
+SECTIONS
+{
+	. = 0x00000000;
+	.text : {
+	_text = .;		/* Text and read-only data */
+		HEAD_TEXT
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+	} :text = 0x0700
+
+	_etext = .;		/* End of text section */
+
+	NOTES :text :note
+
+	.dummy : { *(.dummy) } :data
+
+	RO_DATA_SECTION(PAGE_SIZE)
+
+#ifdef CONFIG_SHARED_KERNEL
+	. = ALIGN(0x100000);	/* VM shared segments are 1MB aligned */
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	_eshared = .;		/* End of shareable data */
+	_sdata = .;		/* Start of data section */
+
+	EXCEPTION_TABLE(16) :data
+
+	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+
+	_edata = .;		/* End of data section */
+
+	/* will be freed after init */
+	. = ALIGN(PAGE_SIZE);	/* Init code and data */
+	__init_begin = .;
+
+	INIT_TEXT_SECTION(PAGE_SIZE)
+
+	/*
+	 * .exit.text is discarded at runtime, not link time,
+	 * to deal with references from __bug_table
+	*/
+	.exit.text : {
+		EXIT_TEXT
+	}
+
+	.exit.data : {
+		EXIT_DATA
+	}
+
+	/* early.c uses stsi, which requires page aligned data. */
+	. = ALIGN(PAGE_SIZE);
+	INIT_DATA_SECTION(0x100)
+
+	PERCPU_SECTION(0x100)
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;		/* freed after init ends here */
+
+	BSS_SECTION(0, 2, 0)
+
+	_end = . ;
+
+	/* Debugging sections.	*/
+	STABS_DEBUG
+	DWARF_DEBUG
+
+	/* Sections to be discarded */
+	DISCARDS
+}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
new file mode 100644
index 000000000..e53d3595a
--- /dev/null
+++ b/arch/s390/kernel/vtime.c
@@ -0,0 +1,379 @@
+/*
+ *    Virtual cpu timer based timer functions.
+ *
+ *    Copyright IBM Corp. 2004, 2012
+ *    Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/timex.h>
+#include <linux/types.h>
+#include <linux/time.h>
+
+#include <asm/cputime.h>
+#include <asm/vtimer.h>
+#include <asm/vtime.h>
+#include <asm/cpu_mf.h>
+#include <asm/smp.h>
+
+static void virt_timer_expire(void);
+
+static LIST_HEAD(virt_timer_list);
+static DEFINE_SPINLOCK(virt_timer_lock);
+static atomic64_t virt_timer_current;
+static atomic64_t virt_timer_elapsed;
+
+static DEFINE_PER_CPU(u64, mt_cycles[32]);
+static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+
+static inline u64 get_vtimer(void)
+{
+	u64 timer;
+
+	asm volatile("stpt %0" : "=m" (timer));
+	return timer;
+}
+
+static inline void set_vtimer(u64 expires)
+{
+	u64 timer;
+
+	asm volatile(
+		"	stpt	%0\n"	/* Store current cpu timer value */
+		"	spt	%1"	/* Set new value imm. afterwards */
+		: "=m" (timer) : "m" (expires));
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
+	S390_lowcore.last_update_timer = expires;
+}
+
+static inline int virt_timer_forward(u64 elapsed)
+{
+	BUG_ON(!irqs_disabled());
+
+	if (list_empty(&virt_timer_list))
+		return 0;
+	elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
+	return elapsed >= atomic64_read(&virt_timer_current);
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	u64 timer, clock, user, system, steal;
+	u64 user_scaled, system_scaled;
+	int i;
+
+	timer = S390_lowcore.last_update_timer;
+	clock = S390_lowcore.last_update_clock;
+	asm volatile(
+		"	stpt	%0\n"	/* Store current cpu timer value */
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+		"	stckf	%1"	/* Store current tod clock value */
+#else
+		"	stck	%1"	/* Store current tod clock value */
+#endif
+		: "=m" (S390_lowcore.last_update_timer),
+		  "=m" (S390_lowcore.last_update_clock));
+	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
+	S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
+
+	/* Do MT utilization calculation */
+	if (smp_cpu_mtid) {
+		u64 cycles_new[32], *cycles_old;
+		u64 delta, mult, div;
+
+		cycles_old = this_cpu_ptr(mt_cycles);
+		if (stcctm5(smp_cpu_mtid + 1, cycles_new) < 2) {
+			mult = div = 0;
+			for (i = 0; i <= smp_cpu_mtid; i++) {
+				delta = cycles_new[i] - cycles_old[i];
+				mult += delta;
+				div += (i + 1) * delta;
+			}
+			if (mult > 0) {
+				/* Update scaling factor */
+				__this_cpu_write(mt_scaling_mult, mult);
+				__this_cpu_write(mt_scaling_div, div);
+				memcpy(cycles_old, cycles_new,
+				       sizeof(u64) * (smp_cpu_mtid + 1));
+			}
+		}
+	}
+
+	user = S390_lowcore.user_timer - ti->user_timer;
+	S390_lowcore.steal_timer -= user;
+	ti->user_timer = S390_lowcore.user_timer;
+
+	system = S390_lowcore.system_timer - ti->system_timer;
+	S390_lowcore.steal_timer -= system;
+	ti->system_timer = S390_lowcore.system_timer;
+
+	user_scaled = user;
+	system_scaled = system;
+	/* Do MT utilization scaling */
+	if (smp_cpu_mtid) {
+		u64 mult = __this_cpu_read(mt_scaling_mult);
+		u64 div = __this_cpu_read(mt_scaling_div);
+
+		user_scaled = (user_scaled * mult) / div;
+		system_scaled = (system_scaled * mult) / div;
+	}
+	account_user_time(tsk, user, user_scaled);
+	account_system_time(tsk, hardirq_offset, system, system_scaled);
+
+	steal = S390_lowcore.steal_timer;
+	if ((s64) steal > 0) {
+		S390_lowcore.steal_timer = 0;
+		account_steal_time(steal);
+	}
+
+	return virt_timer_forward(user + system);
+}
+
+void vtime_task_switch(struct task_struct *prev)
+{
+	struct thread_info *ti;
+
+	do_account_vtime(prev, 0);
+	ti = task_thread_info(prev);
+	ti->user_timer = S390_lowcore.user_timer;
+	ti->system_timer = S390_lowcore.system_timer;
+	ti = task_thread_info(current);
+	S390_lowcore.user_timer = ti->user_timer;
+	S390_lowcore.system_timer = ti->system_timer;
+}
+
+/*
+ * In s390, accounting pending user time also implies
+ * accounting system time in order to correctly compute
+ * the stolen time accounting.
+ */
+void vtime_account_user(struct task_struct *tsk)
+{
+	if (do_account_vtime(tsk, HARDIRQ_OFFSET))
+		virt_timer_expire();
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+void vtime_account_irq_enter(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	u64 timer, system, system_scaled;
+
+	timer = S390_lowcore.last_update_timer;
+	S390_lowcore.last_update_timer = get_vtimer();
+	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
+
+	system = S390_lowcore.system_timer - ti->system_timer;
+	S390_lowcore.steal_timer -= system;
+	ti->system_timer = S390_lowcore.system_timer;
+	system_scaled = system;
+	/* Do MT utilization scaling */
+	if (smp_cpu_mtid) {
+		u64 mult = __this_cpu_read(mt_scaling_mult);
+		u64 div = __this_cpu_read(mt_scaling_div);
+
+		system_scaled = (system_scaled * mult) / div;
+	}
+	account_system_time(tsk, 0, system, system_scaled);
+
+	virt_timer_forward(system);
+}
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
+
+void vtime_account_system(struct task_struct *tsk)
+__attribute__((alias("vtime_account_irq_enter")));
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
+/*
+ * Sorted add to a list. List is linear searched until first bigger
+ * element is found.
+ */
+static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
+{
+	struct vtimer_list *tmp;
+
+	list_for_each_entry(tmp, head, entry) {
+		if (tmp->expires > timer->expires) {
+			list_add_tail(&timer->entry, &tmp->entry);
+			return;
+		}
+	}
+	list_add_tail(&timer->entry, head);
+}
+
+/*
+ * Handler for expired virtual CPU timer.
+ */
+static void virt_timer_expire(void)
+{
+	struct vtimer_list *timer, *tmp;
+	unsigned long elapsed;
+	LIST_HEAD(cb_list);
+
+	/* walk timer list, fire all expired timers */
+	spin_lock(&virt_timer_lock);
+	elapsed = atomic64_read(&virt_timer_elapsed);
+	list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) {
+		if (timer->expires < elapsed)
+			/* move expired timer to the callback queue */
+			list_move_tail(&timer->entry, &cb_list);
+		else
+			timer->expires -= elapsed;
+	}
+	if (!list_empty(&virt_timer_list)) {
+		timer = list_first_entry(&virt_timer_list,
+					 struct vtimer_list, entry);
+		atomic64_set(&virt_timer_current, timer->expires);
+	}
+	atomic64_sub(elapsed, &virt_timer_elapsed);
+	spin_unlock(&virt_timer_lock);
+
+	/* Do callbacks and recharge periodic timers */
+	list_for_each_entry_safe(timer, tmp, &cb_list, entry) {
+		list_del_init(&timer->entry);
+		timer->function(timer->data);
+		if (timer->interval) {
+			/* Recharge interval timer */
+			timer->expires = timer->interval +
+				atomic64_read(&virt_timer_elapsed);
+			spin_lock(&virt_timer_lock);
+			list_add_sorted(timer, &virt_timer_list);
+			spin_unlock(&virt_timer_lock);
+		}
+	}
+}
+
+void init_virt_timer(struct vtimer_list *timer)
+{
+	timer->function = NULL;
+	INIT_LIST_HEAD(&timer->entry);
+}
+EXPORT_SYMBOL(init_virt_timer);
+
+static inline int vtimer_pending(struct vtimer_list *timer)
+{
+	return !list_empty(&timer->entry);
+}
+
+static void internal_add_vtimer(struct vtimer_list *timer)
+{
+	if (list_empty(&virt_timer_list)) {
+		/* First timer, just program it. */
+		atomic64_set(&virt_timer_current, timer->expires);
+		atomic64_set(&virt_timer_elapsed, 0);
+		list_add(&timer->entry, &virt_timer_list);
+	} else {
+		/* Update timer against current base. */
+		timer->expires += atomic64_read(&virt_timer_elapsed);
+		if (likely((s64) timer->expires <
+			   (s64) atomic64_read(&virt_timer_current)))
+			/* The new timer expires before the current timer. */
+			atomic64_set(&virt_timer_current, timer->expires);
+		/* Insert new timer into the list. */
+		list_add_sorted(timer, &virt_timer_list);
+	}
+}
+
+static void __add_vtimer(struct vtimer_list *timer, int periodic)
+{
+	unsigned long flags;
+
+	timer->interval = periodic ? timer->expires : 0;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	internal_add_vtimer(timer);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+}
+
+/*
+ * add_virt_timer - add an oneshot virtual CPU timer
+ */
+void add_virt_timer(struct vtimer_list *timer)
+{
+	__add_vtimer(timer, 0);
+}
+EXPORT_SYMBOL(add_virt_timer);
+
+/*
+ * add_virt_timer_int - add an interval virtual CPU timer
+ */
+void add_virt_timer_periodic(struct vtimer_list *timer)
+{
+	__add_vtimer(timer, 1);
+}
+EXPORT_SYMBOL(add_virt_timer_periodic);
+
+static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic)
+{
+	unsigned long flags;
+	int rc;
+
+	BUG_ON(!timer->function);
+
+	if (timer->expires == expires && vtimer_pending(timer))
+		return 1;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	rc = vtimer_pending(timer);
+	if (rc)
+		list_del_init(&timer->entry);
+	timer->interval = periodic ? expires : 0;
+	timer->expires = expires;
+	internal_add_vtimer(timer);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+	return rc;
+}
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer(struct vtimer_list *timer, u64 expires)
+{
+	return __mod_vtimer(timer, expires, 0);
+}
+EXPORT_SYMBOL(mod_virt_timer);
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires)
+{
+	return __mod_vtimer(timer, expires, 1);
+}
+EXPORT_SYMBOL(mod_virt_timer_periodic);
+
+/*
+ * Delete a virtual timer.
+ *
+ * returns whether the deleted timer was pending (1) or not (0)
+ */
+int del_virt_timer(struct vtimer_list *timer)
+{
+	unsigned long flags;
+
+	if (!vtimer_pending(timer))
+		return 0;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	list_del_init(&timer->entry);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+	return 1;
+}
+EXPORT_SYMBOL(del_virt_timer);
+
+/*
+ * Start the virtual CPU timer on the current CPU.
+ */
+void vtime_init(void)
+{
+	/* set initial cpu timer */
+	set_vtimer(VTIMER_MAX_SLICE);
+}
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
new file mode 100644
index 000000000..5fce52cf0
--- /dev/null
+++ b/arch/s390/kvm/Kconfig
@@ -0,0 +1,58 @@
+#
+# KVM configuration
+#
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	def_bool y
+	prompt "KVM"
+	---help---
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	def_tristate y
+	prompt "Kernel-based Virtual Machine (KVM) support"
+	depends on HAVE_KVM
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select HAVE_KVM_EVENTFD
+	select KVM_ASYNC_PF
+	select KVM_ASYNC_PF_SYNC
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
+	select HAVE_KVM_IRQ_ROUTING
+	select SRCU
+	---help---
+	  Support hosting paravirtualized guest machines using the SIE
+	  virtualization capability on the mainframe. This should work
+	  on any 64bit machine.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_S390_UCONTROL
+	bool "Userspace controlled virtual machines"
+	depends on KVM
+	---help---
+	  Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
+	  controlled by userspace.
+
+	  If unsure, say N.
+
+# OK, it's a little counter-intuitive to do this, but it puts it neatly under
+# the virtualization menu.
+source drivers/vhost/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
new file mode 100644
index 000000000..b3b553469
--- /dev/null
+++ b/arch/s390/kvm/Makefile
@@ -0,0 +1,17 @@
+# Makefile for kernel virtual machines on s390
+#
+# Copyright IBM Corp. 2008
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (version 2 only)
+# as published by the Free Software Foundation.
+
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o
+
+ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs += diag.o gaccess.o guestdbg.o
+
+obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
new file mode 100644
index 000000000..fc7ec9584
--- /dev/null
+++ b/arch/s390/kvm/diag.c
@@ -0,0 +1,255 @@
+/*
+ * handling diagnose instructions
+ *
+ * Copyright IBM Corp. 2008, 2011
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/pgalloc.h>
+#include <asm/virtio-ccw.h>
+#include "kvm-s390.h"
+#include "trace.h"
+#include "trace-s390.h"
+#include "gaccess.h"
+
+static int diag_release_pages(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, end;
+	unsigned long prefix  = kvm_s390_get_prefix(vcpu);
+
+	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+
+	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
+	    || start < 2 * PAGE_SIZE)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+	vcpu->stat.diagnose_10++;
+
+	/*
+	 * We checked for start >= end above, so lets check for the
+	 * fast path (no prefix swap page involved)
+	 */
+	if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
+		gmap_discard(vcpu->arch.gmap, start, end);
+	} else {
+		/*
+		 * This is slow path.  gmap_discard will check for start
+		 * so lets split this into before prefix, prefix, after
+		 * prefix and let gmap_discard make some of these calls
+		 * NOPs.
+		 */
+		gmap_discard(vcpu->arch.gmap, start, prefix);
+		if (start <= prefix)
+			gmap_discard(vcpu->arch.gmap, 0, 4096);
+		if (end > prefix + 4096)
+			gmap_discard(vcpu->arch.gmap, 4096, 8192);
+		gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+	}
+	return 0;
+}
+
+static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+{
+	struct prs_parm {
+		u16 code;
+		u16 subcode;
+		u16 parm_len;
+		u16 parm_version;
+		u64 token_addr;
+		u64 select_mask;
+		u64 compare_mask;
+		u64 zarch;
+	};
+	struct prs_parm parm;
+	int rc;
+	u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+	u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+
+	if (vcpu->run->s.regs.gprs[rx] & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (parm.subcode) {
+	case 0: /* TOKEN */
+		if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+			/*
+			 * If the pagefault handshake is already activated,
+			 * the token must not be changed.  We have to return
+			 * decimal 8 instead, as mandated in SC24-6084.
+			 */
+			vcpu->run->s.regs.gprs[ry] = 8;
+			return 0;
+		}
+
+		if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+		    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+		if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		vcpu->arch.pfault_token = parm.token_addr;
+		vcpu->arch.pfault_select = parm.select_mask;
+		vcpu->arch.pfault_compare = parm.compare_mask;
+		vcpu->run->s.regs.gprs[ry] = 0;
+		rc = 0;
+		break;
+	case 1: /*
+		 * CANCEL
+		 * Specification allows to let already pending tokens survive
+		 * the cancel, therefore to reduce code complexity, we assume
+		 * all outstanding tokens are already pending.
+		 */
+		if (parm.token_addr || parm.select_mask ||
+		    parm.compare_mask || parm.zarch)
+			return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+		vcpu->run->s.regs.gprs[ry] = 0;
+		/*
+		 * If the pfault handling was not established or is already
+		 * canceled SC24-6084 requests to return decimal 4.
+		 */
+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+			vcpu->run->s.regs.gprs[ry] = 4;
+		else
+			vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+		rc = 0;
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+		break;
+	}
+
+	return rc;
+}
+
+static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
+	vcpu->stat.diagnose_44++;
+	kvm_vcpu_on_spin(vcpu);
+	return 0;
+}
+
+static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tcpu;
+	int tid;
+	int i;
+
+	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+	vcpu->stat.diagnose_9c++;
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
+
+	if (tid == vcpu->vcpu_id)
+		return 0;
+
+	kvm_for_each_vcpu(i, tcpu, kvm)
+		if (tcpu->vcpu_id == tid) {
+			kvm_vcpu_yield_to(tcpu);
+			break;
+		}
+
+	return 0;
+}
+
+static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
+{
+	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
+	unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
+
+	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+	switch (subcode) {
+	case 3:
+		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+		break;
+	case 4:
+		vcpu->run->s390_reset_flags = 0;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
+	vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
+	VCPU_EVENT(vcpu, 3, "requesting userspace resets %llx",
+	  vcpu->run->s390_reset_flags);
+	trace_kvm_s390_request_resets(vcpu->run->s390_reset_flags);
+	return -EREMOTE;
+}
+
+static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	/* No virtio-ccw notification? Get out quickly. */
+	if (!vcpu->kvm->arch.css_support ||
+	    (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
+		return -EOPNOTSUPP;
+
+	/*
+	 * The layout is as follows:
+	 * - gpr 2 contains the subchannel id (passed as addr)
+	 * - gpr 3 contains the virtqueue index (passed as datamatch)
+	 * - gpr 4 contains the index on the bus (optionally)
+	 */
+	ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
+				      vcpu->run->s.regs.gprs[2] & 0xffffffff,
+				      8, &vcpu->run->s.regs.gprs[3],
+				      vcpu->run->s.regs.gprs[4]);
+
+	/*
+	 * Return cookie in gpr 2, but don't overwrite the register if the
+	 * diagnose will be handled by userspace.
+	 */
+	if (ret != -EOPNOTSUPP)
+		vcpu->run->s.regs.gprs[2] = ret;
+	/* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
+	return ret < 0 ? ret : 0;
+}
+
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
+{
+	int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	trace_kvm_s390_handle_diag(vcpu, code);
+	switch (code) {
+	case 0x10:
+		return diag_release_pages(vcpu);
+	case 0x44:
+		return __diag_time_slice_end(vcpu);
+	case 0x9c:
+		return __diag_time_slice_end_directed(vcpu);
+	case 0x258:
+		return __diag_page_ref_service(vcpu);
+	case 0x308:
+		return __diag_ipl_functions(vcpu);
+	case 0x500:
+		return __diag_virtio_hypercall(vcpu);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644
index 000000000..a7559f720
--- /dev/null
+++ b/arch/s390/kvm/gaccess.c
@@ -0,0 +1,915 @@
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <asm/pgtable.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include <asm/switch_to.h>
+
+union asce {
+	unsigned long val;
+	struct {
+		unsigned long origin : 52; /* Region- or Segment-Table Origin */
+		unsigned long	 : 2;
+		unsigned long g  : 1; /* Subspace Group Control */
+		unsigned long p  : 1; /* Private Space Control */
+		unsigned long s  : 1; /* Storage-Alteration-Event Control */
+		unsigned long x  : 1; /* Space-Switch-Event Control */
+		unsigned long r  : 1; /* Real-Space Control */
+		unsigned long	 : 1;
+		unsigned long dt : 2; /* Designation-Type Control */
+		unsigned long tl : 2; /* Region- or Segment-Table Length */
+	};
+};
+
+enum {
+	ASCE_TYPE_SEGMENT = 0,
+	ASCE_TYPE_REGION3 = 1,
+	ASCE_TYPE_REGION2 = 2,
+	ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Second-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Second-Table Length */
+	};
+};
+
+union region2_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long rto: 52;/* Region-Table Origin */
+		unsigned long	 : 2;
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long	 : 1;
+		unsigned long tf : 2; /* Region-Third-Table Offset */
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long	 : 1;
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long tl : 2; /* Region-Third-Table Length */
+	};
+};
+
+struct region3_table_entry_fc0 {
+	unsigned long sto: 52;/* Segment-Table Origin */
+	unsigned long	 : 1;
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 1;
+	unsigned long tf : 2; /* Segment-Table Offset */
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+	unsigned long	 : 14;
+	unsigned long av : 1; /* ACCF-Validity Control */
+	unsigned long acc: 4; /* Access-Control Bits */
+	unsigned long f  : 1; /* Fetch-Protection Bit */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long co : 1; /* Change-Recording Override */
+	unsigned long	 : 2;
+	unsigned long i  : 1; /* Region-Invalid Bit */
+	unsigned long cr : 1; /* Common-Region Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+union region3_table_entry {
+	unsigned long val;
+	struct region3_table_entry_fc0 fc0;
+	struct region3_table_entry_fc1 fc1;
+	struct {
+		unsigned long	 : 53;
+		unsigned long fc : 1; /* Format-Control */
+		unsigned long	 : 4;
+		unsigned long i  : 1; /* Region-Invalid Bit */
+		unsigned long cr : 1; /* Common-Region Bit */
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long	 : 2;
+	};
+};
+
+struct segment_entry_fc0 {
+	unsigned long pto: 53;/* Page-Table Origin */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long	 : 3;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+struct segment_entry_fc1 {
+	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+	unsigned long	 : 3;
+	unsigned long av : 1; /* ACCF-Validity Control */
+	unsigned long acc: 4; /* Access-Control Bits */
+	unsigned long f  : 1; /* Fetch-Protection Bit */
+	unsigned long fc : 1; /* Format-Control */
+	unsigned long p  : 1; /* DAT-Protection Bit */
+	unsigned long co : 1; /* Change-Recording Override */
+	unsigned long	 : 2;
+	unsigned long i  : 1; /* Segment-Invalid Bit */
+	unsigned long cs : 1; /* Common-Segment Bit */
+	unsigned long tt : 2; /* Table-Type Bits */
+	unsigned long	 : 2;
+};
+
+union segment_table_entry {
+	unsigned long val;
+	struct segment_entry_fc0 fc0;
+	struct segment_entry_fc1 fc1;
+	struct {
+		unsigned long	 : 53;
+		unsigned long fc : 1; /* Format-Control */
+		unsigned long	 : 4;
+		unsigned long i  : 1; /* Segment-Invalid Bit */
+		unsigned long cs : 1; /* Common-Segment Bit */
+		unsigned long tt : 2; /* Table-Type Bits */
+		unsigned long	 : 2;
+	};
+};
+
+enum {
+	TABLE_TYPE_SEGMENT = 0,
+	TABLE_TYPE_REGION3 = 1,
+	TABLE_TYPE_REGION2 = 2,
+	TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+	unsigned long val;
+	struct {
+		unsigned long pfra : 52; /* Page-Frame Real Address */
+		unsigned long z  : 1; /* Zero Bit */
+		unsigned long i  : 1; /* Page-Invalid Bit */
+		unsigned long p  : 1; /* DAT-Protection Bit */
+		unsigned long co : 1; /* Change-Recording Override */
+		unsigned long	 : 8;
+	};
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+	unsigned long addr;
+	struct {
+		unsigned long rfx : 11;
+		unsigned long rsx : 11;
+		unsigned long rtx : 11;
+		unsigned long sx  : 11;
+		unsigned long px  : 8;
+		unsigned long bx  : 12;
+	};
+	struct {
+		unsigned long rfx01 : 2;
+		unsigned long	    : 9;
+		unsigned long rsx01 : 2;
+		unsigned long	    : 9;
+		unsigned long rtx01 : 2;
+		unsigned long	    : 9;
+		unsigned long sx01  : 2;
+		unsigned long	    : 29;
+	};
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+	unsigned long addr;
+	unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+	unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+	unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+union alet {
+	u32 val;
+	struct {
+		u32 reserved : 7;
+		u32 p        : 1;
+		u32 alesn    : 8;
+		u32 alen     : 16;
+	};
+};
+
+union ald {
+	u32 val;
+	struct {
+		u32     : 1;
+		u32 alo : 24;
+		u32 all : 7;
+	};
+};
+
+struct ale {
+	unsigned long i      : 1; /* ALEN-Invalid Bit */
+	unsigned long        : 5;
+	unsigned long fo     : 1; /* Fetch-Only Bit */
+	unsigned long p      : 1; /* Private Bit */
+	unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
+	unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
+	unsigned long        : 32;
+	unsigned long        : 1;
+	unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
+	unsigned long        : 6;
+	unsigned long astesn : 32; /* ASTE Sequence Number */
+} __packed;
+
+struct aste {
+	unsigned long i      : 1; /* ASX-Invalid Bit */
+	unsigned long ato    : 29; /* Authority-Table Origin */
+	unsigned long        : 1;
+	unsigned long b      : 1; /* Base-Space Bit */
+	unsigned long ax     : 16; /* Authorization Index */
+	unsigned long atl    : 12; /* Authority-Table Length */
+	unsigned long        : 2;
+	unsigned long ca     : 1; /* Controlled-ASN Bit */
+	unsigned long ra     : 1; /* Reusable-ASN Bit */
+	unsigned long asce   : 64; /* Address-Space-Control Element */
+	unsigned long ald    : 32;
+	unsigned long astesn : 32;
+	/* .. more fields there */
+} __packed;
+
+int ipte_lock_held(struct kvm_vcpu *vcpu)
+{
+	union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+
+	if (vcpu->arch.sie_block->eca & 1)
+		return ic->kh != 0;
+	return vcpu->kvm->arch.ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count++;
+	if (vcpu->kvm->arch.ipte_lock_count > 1)
+		goto out;
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		old = READ_ONCE(*ic);
+		while (old.k) {
+			cond_resched();
+			old = READ_ONCE(*ic);
+		}
+		new = old;
+		new.k = 1;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+out:
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count--;
+	if (vcpu->kvm->arch.ipte_lock_count)
+		goto out;
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		old = READ_ONCE(*ic);
+		new = old;
+		new.k = 0;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	wake_up(&vcpu->kvm->arch.ipte_wq);
+out:
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		old = READ_ONCE(*ic);
+		while (old.kg) {
+			cond_resched();
+			old = READ_ONCE(*ic);
+		}
+		new = old;
+		new.k = 1;
+		new.kh++;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+}
+
+static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+{
+	union ipte_control old, new, *ic;
+
+	ic = &vcpu->kvm->arch.sca->ipte_control;
+	do {
+		old = READ_ONCE(*ic);
+		new = old;
+		new.kh--;
+		if (!new.kh)
+			new.k = 0;
+	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	if (!new.kh)
+		wake_up(&vcpu->kvm->arch.ipte_wq);
+}
+
+void ipte_lock(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->eca & 1)
+		ipte_lock_siif(vcpu);
+	else
+		ipte_lock_simple(vcpu);
+}
+
+void ipte_unlock(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->eca & 1)
+		ipte_unlock_siif(vcpu);
+	else
+		ipte_unlock_simple(vcpu);
+}
+
+static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
+			  int write)
+{
+	union alet alet;
+	struct ale ale;
+	struct aste aste;
+	unsigned long ald_addr, authority_table_addr;
+	union ald ald;
+	int eax, rc;
+	u8 authority_table;
+
+	if (ar >= NUM_ACRS)
+		return -EINVAL;
+
+	save_access_regs(vcpu->run->s.regs.acrs);
+	alet.val = vcpu->run->s.regs.acrs[ar];
+
+	if (ar == 0 || alet.val == 0) {
+		asce->val = vcpu->arch.sie_block->gcr[1];
+		return 0;
+	} else if (alet.val == 1) {
+		asce->val = vcpu->arch.sie_block->gcr[7];
+		return 0;
+	}
+
+	if (alet.reserved)
+		return PGM_ALET_SPECIFICATION;
+
+	if (alet.p)
+		ald_addr = vcpu->arch.sie_block->gcr[5];
+	else
+		ald_addr = vcpu->arch.sie_block->gcr[2];
+	ald_addr &= 0x7fffffc0;
+
+	rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
+	if (rc)
+		return rc;
+
+	if (alet.alen / 8 > ald.all)
+		return PGM_ALEN_TRANSLATION;
+
+	if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
+		return PGM_ADDRESSING;
+
+	rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
+			     sizeof(struct ale));
+	if (rc)
+		return rc;
+
+	if (ale.i == 1)
+		return PGM_ALEN_TRANSLATION;
+	if (ale.alesn != alet.alesn)
+		return PGM_ALE_SEQUENCE;
+
+	rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
+	if (rc)
+		return rc;
+
+	if (aste.i)
+		return PGM_ASTE_VALIDITY;
+	if (aste.astesn != ale.astesn)
+		return PGM_ASTE_SEQUENCE;
+
+	if (ale.p == 1) {
+		eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
+		if (ale.aleax != eax) {
+			if (eax / 16 > aste.atl)
+				return PGM_EXTENDED_AUTHORITY;
+
+			authority_table_addr = aste.ato * 4 + eax / 4;
+
+			rc = read_guest_real(vcpu, authority_table_addr,
+					     &authority_table,
+					     sizeof(u8));
+			if (rc)
+				return rc;
+
+			if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
+				return PGM_EXTENDED_AUTHORITY;
+		}
+	}
+
+	if (ale.fo == 1 && write)
+		return PGM_PROTECTION;
+
+	asce->val = aste.asce;
+	return 0;
+}
+
+struct trans_exc_code_bits {
+	unsigned long addr : 52; /* Translation-exception Address */
+	unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+	unsigned long	   : 6;
+	unsigned long b60  : 1;
+	unsigned long b61  : 1;
+	unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+	FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+	FSI_STORE   = 1, /* Exception was due to store operation */
+	FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+			 ar_t ar, int write)
+{
+	int rc;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	struct trans_exc_code_bits *tec_bits;
+
+	memset(pgm, 0, sizeof(*pgm));
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+	tec_bits->as = psw_bits(*psw).as;
+
+	if (!psw_bits(*psw).t) {
+		asce->val = 0;
+		asce->r = 1;
+		return 0;
+	}
+
+	switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+	case PSW_AS_PRIMARY:
+		asce->val = vcpu->arch.sie_block->gcr[1];
+		return 0;
+	case PSW_AS_SECONDARY:
+		asce->val = vcpu->arch.sie_block->gcr[7];
+		return 0;
+	case PSW_AS_HOME:
+		asce->val = vcpu->arch.sie_block->gcr[13];
+		return 0;
+	case PSW_AS_ACCREG:
+		rc = ar_translation(vcpu, asce, ar, write);
+		switch (rc) {
+		case PGM_ALEN_TRANSLATION:
+		case PGM_ALE_SEQUENCE:
+		case PGM_ASTE_VALIDITY:
+		case PGM_ASTE_SEQUENCE:
+		case PGM_EXTENDED_AUTHORITY:
+			vcpu->arch.pgm.exc_access_id = ar;
+			break;
+		case PGM_PROTECTION:
+			tec_bits->b60 = 1;
+			tec_bits->b61 = 1;
+			break;
+		}
+		if (rc > 0)
+			pgm->code = rc;
+		return rc;
+	}
+	return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+	return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @asce: effective asce
+ * @write: indicates if access is a write access
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ *	    - a negative value if guest access failed due to e.g. broken
+ *	      guest mapping
+ *	    - a positve value if an access exception happened. In this case
+ *	      the returned value is the program interruption code as defined
+ *	      by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+				     unsigned long *gpa, const union asce asce,
+				     int write)
+{
+	union vaddress vaddr = {.addr = gva};
+	union raddress raddr = {.addr = gva};
+	union page_table_entry pte;
+	int dat_protection = 0;
+	union ctlreg0 ctlreg0;
+	unsigned long ptr;
+	int edat1, edat2;
+
+	ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+	edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
+	edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
+	if (asce.r)
+		goto real_address;
+	ptr = asce.origin * 4096;
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:
+		if (vaddr.rfx01 > asce.tl)
+			return PGM_REGION_FIRST_TRANS;
+		ptr += vaddr.rfx * 8;
+		break;
+	case ASCE_TYPE_REGION2:
+		if (vaddr.rfx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rsx01 > asce.tl)
+			return PGM_REGION_SECOND_TRANS;
+		ptr += vaddr.rsx * 8;
+		break;
+	case ASCE_TYPE_REGION3:
+		if (vaddr.rfx || vaddr.rsx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.rtx01 > asce.tl)
+			return PGM_REGION_THIRD_TRANS;
+		ptr += vaddr.rtx * 8;
+		break;
+	case ASCE_TYPE_SEGMENT:
+		if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+			return PGM_ASCE_TYPE;
+		if (vaddr.sx01 > asce.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		ptr += vaddr.sx * 8;
+		break;
+	}
+	switch (asce.dt) {
+	case ASCE_TYPE_REGION1:	{
+		union region1_table_entry rfte;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rfte.val))
+			return -EFAULT;
+		if (rfte.i)
+			return PGM_REGION_FIRST_TRANS;
+		if (rfte.tt != TABLE_TYPE_REGION1)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+			return PGM_REGION_SECOND_TRANS;
+		if (edat1)
+			dat_protection |= rfte.p;
+		ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_REGION2: {
+		union region2_table_entry rste;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rste.val))
+			return -EFAULT;
+		if (rste.i)
+			return PGM_REGION_SECOND_TRANS;
+		if (rste.tt != TABLE_TYPE_REGION2)
+			return PGM_TRANSLATION_SPEC;
+		if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+			return PGM_REGION_THIRD_TRANS;
+		if (edat1)
+			dat_protection |= rste.p;
+		ptr = rste.rto * 4096 + vaddr.rtx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_REGION3: {
+		union region3_table_entry rtte;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &rtte.val))
+			return -EFAULT;
+		if (rtte.i)
+			return PGM_REGION_THIRD_TRANS;
+		if (rtte.tt != TABLE_TYPE_REGION3)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.cr && asce.p && edat2)
+			return PGM_TRANSLATION_SPEC;
+		if (rtte.fc && edat2) {
+			dat_protection |= rtte.fc1.p;
+			raddr.rfaa = rtte.fc1.rfaa;
+			goto absolute_address;
+		}
+		if (vaddr.sx01 < rtte.fc0.tf)
+			return PGM_SEGMENT_TRANSLATION;
+		if (vaddr.sx01 > rtte.fc0.tl)
+			return PGM_SEGMENT_TRANSLATION;
+		if (edat1)
+			dat_protection |= rtte.fc0.p;
+		ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+	}
+		/* fallthrough */
+	case ASCE_TYPE_SEGMENT: {
+		union segment_table_entry ste;
+
+		if (kvm_is_error_gpa(vcpu->kvm, ptr))
+			return PGM_ADDRESSING;
+		if (deref_table(vcpu->kvm, ptr, &ste.val))
+			return -EFAULT;
+		if (ste.i)
+			return PGM_SEGMENT_TRANSLATION;
+		if (ste.tt != TABLE_TYPE_SEGMENT)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.cs && asce.p)
+			return PGM_TRANSLATION_SPEC;
+		if (ste.fc && edat1) {
+			dat_protection |= ste.fc1.p;
+			raddr.sfaa = ste.fc1.sfaa;
+			goto absolute_address;
+		}
+		dat_protection |= ste.fc0.p;
+		ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+	}
+	}
+	if (kvm_is_error_gpa(vcpu->kvm, ptr))
+		return PGM_ADDRESSING;
+	if (deref_table(vcpu->kvm, ptr, &pte.val))
+		return -EFAULT;
+	if (pte.i)
+		return PGM_PAGE_TRANSLATION;
+	if (pte.z)
+		return PGM_TRANSLATION_SPEC;
+	if (pte.co && !edat1)
+		return PGM_TRANSLATION_SPEC;
+	dat_protection |= pte.p;
+	raddr.pfra = pte.pfra;
+real_address:
+	raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+	if (write && dat_protection)
+		return PGM_PROTECTION;
+	if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+		return PGM_ADDRESSING;
+	*gpa = raddr.addr;
+	return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+	/* Check for address ranges 0..511 and 4096..4607 */
+	return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
+					  const union asce asce)
+{
+	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+	if (!ctlreg0.lap)
+		return 0;
+	if (psw_bits(*psw).t && asce.p)
+		return 0;
+	return 1;
+}
+
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+			    unsigned long *pages, unsigned long nr_pages,
+			    const union asce asce, int write)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct trans_exc_code_bits *tec_bits;
+	int lap_enabled, rc;
+
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	lap_enabled = low_address_protection_enabled(vcpu, asce);
+	while (nr_pages) {
+		ga = kvm_s390_logical_to_effective(vcpu, ga);
+		tec_bits->addr = ga >> PAGE_SHIFT;
+		if (write && lap_enabled && is_low_address(ga)) {
+			pgm->code = PGM_PROTECTION;
+			return pgm->code;
+		}
+		ga &= PAGE_MASK;
+		if (psw_bits(*psw).t) {
+			rc = guest_translate(vcpu, ga, pages, asce, write);
+			if (rc < 0)
+				return rc;
+			if (rc == PGM_PROTECTION)
+				tec_bits->b61 = 1;
+			if (rc)
+				pgm->code = rc;
+		} else {
+			*pages = kvm_s390_real_to_abs(vcpu, ga);
+			if (kvm_is_error_gpa(vcpu->kvm, *pages))
+				pgm->code = PGM_ADDRESSING;
+		}
+		if (pgm->code)
+			return pgm->code;
+		ga += PAGE_SIZE;
+		pages++;
+		nr_pages--;
+	}
+	return 0;
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
+		 unsigned long len, int write)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	unsigned long _len, nr_pages, gpa, idx;
+	unsigned long pages_array[2];
+	unsigned long *pages;
+	int need_ipte_lock;
+	union asce asce;
+	int rc;
+
+	if (!len)
+		return 0;
+	rc = get_vcpu_asce(vcpu, &asce, ar, write);
+	if (rc)
+		return rc;
+	nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+	pages = pages_array;
+	if (nr_pages > ARRAY_SIZE(pages_array))
+		pages = vmalloc(nr_pages * sizeof(unsigned long));
+	if (!pages)
+		return -ENOMEM;
+	need_ipte_lock = psw_bits(*psw).t && !asce.r;
+	if (need_ipte_lock)
+		ipte_lock(vcpu);
+	rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
+	for (idx = 0; idx < nr_pages && !rc; idx++) {
+		gpa = *(pages + idx) + (ga & ~PAGE_MASK);
+		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+		if (write)
+			rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+		else
+			rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
+		len -= _len;
+		ga += _len;
+		data += _len;
+	}
+	if (need_ipte_lock)
+		ipte_unlock(vcpu);
+	if (nr_pages > ARRAY_SIZE(pages_array))
+		vfree(pages);
+	return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+		      void *data, unsigned long len, int write)
+{
+	unsigned long _len, gpa;
+	int rc = 0;
+
+	while (len && !rc) {
+		gpa = kvm_s390_real_to_abs(vcpu, gra);
+		_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+		if (write)
+			rc = write_guest_abs(vcpu, gpa, data, _len);
+		else
+			rc = read_guest_abs(vcpu, gpa, data, _len);
+		len -= _len;
+		gra += _len;
+		data += _len;
+	}
+	return rc;
+}
+
+/**
+ * guest_translate_address - translate guest logical into guest absolute address
+ *
+ * Parameter semantics are the same as the ones from guest_translate.
+ * The memory contents at the guest address are not changed.
+ *
+ * Note: The IPTE lock is not taken during this function, so the caller
+ * has to take care of this.
+ */
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+			    unsigned long *gpa, int write)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct trans_exc_code_bits *tec;
+	union asce asce;
+	int rc;
+
+	gva = kvm_s390_logical_to_effective(vcpu, gva);
+	tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	rc = get_vcpu_asce(vcpu, &asce, ar, write);
+	tec->addr = gva >> PAGE_SHIFT;
+	if (rc)
+		return rc;
+	if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
+		if (write) {
+			rc = pgm->code = PGM_PROTECTION;
+			return rc;
+		}
+	}
+
+	if (psw_bits(*psw).t && !asce.r) {	/* Use DAT? */
+		rc = guest_translate(vcpu, gva, gpa, asce, write);
+		if (rc > 0) {
+			if (rc == PGM_PROTECTION)
+				tec->b61 = 1;
+			pgm->code = rc;
+		}
+	} else {
+		rc = 0;
+		*gpa = kvm_s390_real_to_abs(vcpu, gva);
+		if (kvm_is_error_gpa(vcpu->kvm, *gpa))
+			rc = pgm->code = PGM_ADDRESSING;
+	}
+
+	return rc;
+}
+
+/**
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+		    unsigned long length, int is_write)
+{
+	unsigned long gpa;
+	unsigned long currlen;
+	int rc = 0;
+
+	ipte_lock(vcpu);
+	while (length > 0 && !rc) {
+		currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
+		rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+		gva += currlen;
+		length -= currlen;
+	}
+	ipte_unlock(vcpu);
+
+	return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @gra: Guest real address
+ *
+ * Checks whether an address is subject to low-address protection and set
+ * up vcpu->arch.pgm accordingly if necessary.
+ *
+ * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
+ */
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
+{
+	struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	struct trans_exc_code_bits *tec_bits;
+	union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+
+	if (!ctlreg0.lap || !is_low_address(gra))
+		return 0;
+
+	memset(pgm, 0, sizeof(*pgm));
+	tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+	tec_bits->fsi = FSI_STORE;
+	tec_bits->as = psw_bits(*psw).as;
+	tec_bits->addr = gra >> PAGE_SHIFT;
+	pgm->code = PGM_PROTECTION;
+
+	return pgm->code;
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
new file mode 100644
index 000000000..ef03726cc
--- /dev/null
+++ b/arch/s390/kvm/gaccess.h
@@ -0,0 +1,338 @@
+/*
+ * access guest memory
+ *
+ * Copyright IBM Corp. 2008, 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_GACCESS_H
+#define __KVM_S390_GACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include "kvm-s390.h"
+
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
+static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
+						 unsigned long gra)
+{
+	unsigned long prefix  = kvm_s390_get_prefix(vcpu);
+
+	if (gra < 2 * PAGE_SIZE)
+		gra += prefix;
+	else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+		gra -= prefix;
+	return gra;
+}
+
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+							  unsigned long ga)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+	if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
+		return ga;
+	if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
+		return ga & ((1UL << 31) - 1);
+	return ga & ((1UL << 24) - 1);
+}
+
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
+
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra)				\
+({								\
+	struct kvm_vcpu *__vcpu = (vcpu);			\
+	__typeof__(*(gra)) __x = (x);				\
+	unsigned long __gpa;					\
+								\
+	__gpa = (unsigned long)(gra);				\
+	__gpa += kvm_s390_get_prefix(__vcpu);			\
+	kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x));	\
+})
+
+/**
+ * write_guest_lc - copy data from kernel space to guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from kernel space to guest vcpu's lowcore. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		   unsigned long len)
+{
+	unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
+
+	return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_lc - copy data from guest vcpu's lowcore to kernel space
+ * @vcpu: virtual cpu
+ * @gra: vcpu's source guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy data from guest vcpu's lowcore to kernel space. The entire range must
+ * be located within the vcpu's lowcore, otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ *	 the guest memory mapping is broken. In any case the best solution
+ *	 would be to terminate the guest.
+ *	 It is wrong to inject a guest exception.
+ */
+static inline __must_check
+int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		  unsigned long len)
+{
+	unsigned long gpa = gra + kvm_s390_get_prefix(vcpu);
+
+	return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+			    ar_t ar, unsigned long *gpa, int write);
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+		    unsigned long length, int is_write);
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
+		 unsigned long len, int write);
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+		      void *data, unsigned long len, int write);
+
+/**
+ * write_guest - copy data from kernel space to guest space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @ga (guest address).
+ * In order to copy data to guest space the PSW of the vcpu is inspected:
+ * If DAT is off data will be copied to guest real or absolute memory.
+ * If DAT is on data will be copied to the address space as specified by
+ * the address space bits of the PSW:
+ * Primary, secondary, home space or access register mode.
+ * The addressing mode of the PSW is also inspected, so that address wrap
+ * around is taken into account for 24-, 31- and 64-bit addressing mode,
+ * if the to be copied data crosses page boundaries in guest address space.
+ * In addition also low address and DAT protection are inspected before
+ * copying any data (key protection is currently not implemented).
+ *
+ * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
+ * In case of an access exception (e.g. protection exception) pgm will contain
+ * all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
+ * will inject a correct exception into the guest.
+ * If no access exception happened, the contents of pgm are undefined when
+ * this function returns.
+ *
+ * Returns:  - zero on success
+ *	     - a negative value if e.g. the guest mapping is broken or in
+ *	       case of out-of-memory. In this case the contents of pgm are
+ *	       undefined. Also parts of @data may have been copied to guest
+ *	       space.
+ *	     - a positive value if an access exception happened. In this case
+ *	       the returned value is the program interruption code and the
+ *	       contents of pgm may be used to inject an exception into the
+ *	       guest. No data has been copied to guest space.
+ *
+ * Note: in case an access exception is recognized no data has been copied to
+ *	 guest space (this is also true, if the to be copied data would cross
+ *	 one or more page boundaries in guest space).
+ *	 Therefore this function may be used for nullifying and suppressing
+ *	 instruction emulation.
+ *	 It may also be used for terminating instructions, if it is undefined
+ *	 if data has been changed in guest space in case of an exception.
+ */
+static inline __must_check
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
+		unsigned long len)
+{
+	return access_guest(vcpu, ga, ar, data, len, 1);
+}
+
+/**
+ * read_guest - copy data from guest space to kernel space
+ * @vcpu: virtual cpu
+ * @ga: guest address
+ * @ar: access register
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @ga (guest address) to @data (kernel space).
+ *
+ * The behaviour of read_guest is identical to write_guest, except that
+ * data will be copied from guest space to kernel space.
+ */
+static inline __must_check
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
+	       unsigned long len)
+{
+	return access_guest(vcpu, ga, ar, data, len, 0);
+}
+
+/**
+ * write_guest_abs - copy data from kernel space to guest space absolute
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+		    unsigned long len)
+{
+	return kvm_write_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * read_guest_abs - copy data from guest space absolute to kernel space
+ * @vcpu: virtual cpu
+ * @gpa: guest physical (absolute) address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
+		   unsigned long len)
+{
+	return kvm_read_guest(vcpu->kvm, gpa, data, len);
+}
+
+/**
+ * write_guest_real - copy data from kernel space to guest space real
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: source address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @data (kernel space) to @gra (guest real address).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest low address and key protection are not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to guest memory.
+ */
+static inline __must_check
+int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		     unsigned long len)
+{
+	return access_guest_real(vcpu, gra, data, len, 1);
+}
+
+/**
+ * read_guest_real - copy data from guest space real to kernel space
+ * @vcpu: virtual cpu
+ * @gra: guest real address
+ * @data: destination address in kernel space
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @gra (guest real address) to @data (kernel space).
+ * It is up to the caller to ensure that the entire guest memory range is
+ * valid memory before calling this function.
+ * Guest key protection is not checked.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * If an error occurs data may have been copied partially to kernel space.
+ */
+static inline __must_check
+int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
+		    unsigned long len)
+{
+	return access_guest_real(vcpu, gra, data, len, 0);
+}
+
+void ipte_lock(struct kvm_vcpu *vcpu);
+void ipte_unlock(struct kvm_vcpu *vcpu);
+int ipte_lock_held(struct kvm_vcpu *vcpu);
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
+
+#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
new file mode 100644
index 000000000..e97b3455d
--- /dev/null
+++ b/arch/s390/kvm/guestdbg.c
@@ -0,0 +1,482 @@
+/*
+ * kvm guest debug support
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+/*
+ * Extends the address range given by *start and *stop to include the address
+ * range starting with estart and the length len. Takes care of overflowing
+ * intervals and tries to minimize the overall intervall size.
+ */
+static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
+{
+	u64 estop;
+
+	if (len > 0)
+		len--;
+	else
+		len = 0;
+
+	estop = estart + len;
+
+	/* 0-0 range represents "not set" */
+	if ((*start == 0) && (*stop == 0)) {
+		*start = estart;
+		*stop = estop;
+	} else if (*start <= *stop) {
+		/* increase the existing range */
+		if (estart < *start)
+			*start = estart;
+		if (estop > *stop)
+			*stop = estop;
+	} else {
+		/* "overflowing" interval, whereby *stop > *start */
+		if (estart <= *stop) {
+			if (estop > *stop)
+				*stop = estop;
+		} else if (estop > *start) {
+			if (estart < *start)
+				*start = estart;
+		}
+		/* minimize the range */
+		else if ((estop - *stop) < (*start - estart))
+			*stop = estop;
+		else
+			*start = estart;
+	}
+}
+
+#define MAX_INST_SIZE 6
+
+static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, len;
+	u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+	u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+	u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
+	    vcpu->arch.guestdbg.hw_bp_info == NULL)
+		return;
+
+	/*
+	 * If the guest is not interrested in branching events, we can savely
+	 * limit them to the PER address range.
+	 */
+	if (!(*cr9 & PER_EVENT_BRANCH))
+		*cr9 |= PER_CONTROL_BRANCH_ADDRESS;
+	*cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+		start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
+		len = vcpu->arch.guestdbg.hw_bp_info[i].len;
+
+		/*
+		 * The instruction in front of the desired bp has to
+		 * report instruction-fetching events
+		 */
+		if (start < MAX_INST_SIZE) {
+			len += start;
+			start = 0;
+		} else {
+			start -= MAX_INST_SIZE;
+			len += MAX_INST_SIZE;
+		}
+
+		extend_address_range(cr10, cr11, start, len);
+	}
+}
+
+static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
+{
+	unsigned long start, len;
+	u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
+	u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
+	u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
+	    vcpu->arch.guestdbg.hw_wp_info == NULL)
+		return;
+
+	/* if host uses storage alternation for special address
+	 * spaces, enable all events and give all to the guest */
+	if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
+		*cr9 &= ~PER_CONTROL_ALTERATION;
+		*cr10 = 0;
+		*cr11 = PSW_ADDR_INSN;
+	} else {
+		*cr9 &= ~PER_CONTROL_ALTERATION;
+		*cr9 |= PER_EVENT_STORE;
+
+		for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+			start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
+			len = vcpu->arch.guestdbg.hw_wp_info[i].len;
+
+			extend_address_range(cr10, cr11, start, len);
+		}
+	}
+}
+
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
+	vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
+	vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
+	vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
+}
+
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
+	vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
+	vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
+	vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
+}
+
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * TODO: if guest psw has per enabled, otherwise 0s!
+	 * This reduces the amount of reported events.
+	 * Need to intercept all psw changes!
+	 */
+
+	if (guestdbg_sstep_enabled(vcpu)) {
+		/* disable timer (clock-comparator) interrupts */
+		vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
+		vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
+		vcpu->arch.sie_block->gcr[10] = 0;
+		vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
+	}
+
+	if (guestdbg_hw_bp_enabled(vcpu)) {
+		enable_all_hw_bp(vcpu);
+		enable_all_hw_wp(vcpu);
+	}
+
+	/* TODO: Instruction-fetching-nullification not allowed for now */
+	if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
+		vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
+}
+
+#define MAX_WP_SIZE 100
+
+static int __import_wp_info(struct kvm_vcpu *vcpu,
+			    struct kvm_hw_breakpoint *bp_data,
+			    struct kvm_hw_wp_info_arch *wp_info)
+{
+	int ret = 0;
+	wp_info->len = bp_data->len;
+	wp_info->addr = bp_data->addr;
+	wp_info->phys_addr = bp_data->phys_addr;
+	wp_info->old_data = NULL;
+
+	if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
+		return -EINVAL;
+
+	wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
+	if (!wp_info->old_data)
+		return -ENOMEM;
+	/* try to backup the original value */
+	ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data,
+			     wp_info->len);
+	if (ret) {
+		kfree(wp_info->old_data);
+		wp_info->old_data = NULL;
+	}
+
+	return ret;
+}
+
+#define MAX_BP_COUNT 50
+
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+			    struct kvm_guest_debug *dbg)
+{
+	int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
+	struct kvm_hw_breakpoint *bp_data = NULL;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	struct kvm_hw_bp_info_arch *bp_info = NULL;
+
+	if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
+		return 0;
+	else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
+		return -EINVAL;
+
+	size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
+	bp_data = kmalloc(size, GFP_KERNEL);
+	if (!bp_data) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	if (copy_from_user(bp_data, dbg->arch.hw_bp, size)) {
+		ret = -EFAULT;
+		goto error;
+	}
+
+	for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
+		switch (bp_data[i].type) {
+		case KVM_HW_WP_WRITE:
+			nr_wp++;
+			break;
+		case KVM_HW_BP:
+			nr_bp++;
+			break;
+		default:
+			break;
+		}
+	}
+
+	size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
+	if (size > 0) {
+		wp_info = kmalloc(size, GFP_KERNEL);
+		if (!wp_info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+	size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
+	if (size > 0) {
+		bp_info = kmalloc(size, GFP_KERNEL);
+		if (!bp_info) {
+			ret = -ENOMEM;
+			goto error;
+		}
+	}
+
+	for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
+		switch (bp_data[i].type) {
+		case KVM_HW_WP_WRITE:
+			ret = __import_wp_info(vcpu, &bp_data[i],
+					       &wp_info[nr_wp]);
+			if (ret)
+				goto error;
+			nr_wp++;
+			break;
+		case KVM_HW_BP:
+			bp_info[nr_bp].len = bp_data[i].len;
+			bp_info[nr_bp].addr = bp_data[i].addr;
+			nr_bp++;
+			break;
+		}
+	}
+
+	vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
+	vcpu->arch.guestdbg.hw_bp_info = bp_info;
+	vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
+	vcpu->arch.guestdbg.hw_wp_info = wp_info;
+	return 0;
+error:
+	kfree(bp_data);
+	kfree(wp_info);
+	kfree(bp_info);
+	return ret;
+}
+
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+		hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+		kfree(hw_wp_info->old_data);
+		hw_wp_info->old_data = NULL;
+	}
+	kfree(vcpu->arch.guestdbg.hw_wp_info);
+	vcpu->arch.guestdbg.hw_wp_info = NULL;
+
+	kfree(vcpu->arch.guestdbg.hw_bp_info);
+	vcpu->arch.guestdbg.hw_bp_info = NULL;
+
+	vcpu->arch.guestdbg.nr_hw_wp = 0;
+	vcpu->arch.guestdbg.nr_hw_bp = 0;
+}
+
+static inline int in_addr_range(u64 addr, u64 a, u64 b)
+{
+	if (a <= b)
+		return (addr >= a) && (addr <= b);
+	else
+		/* "overflowing" interval */
+		return (addr <= a) && (addr >= b);
+}
+
+#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
+
+static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
+					      unsigned long addr)
+{
+	struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
+	int i;
+
+	if (vcpu->arch.guestdbg.nr_hw_bp == 0)
+		return NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
+		/* addr is directly the start or in the range of a bp */
+		if (addr == bp_info->addr)
+			goto found;
+		if (bp_info->len > 0 &&
+		    in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
+			goto found;
+
+		bp_info++;
+	}
+
+	return NULL;
+found:
+	return bp_info;
+}
+
+static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	void *temp = NULL;
+
+	if (vcpu->arch.guestdbg.nr_hw_wp == 0)
+		return NULL;
+
+	for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
+		wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
+		if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
+			continue;
+
+		temp = kmalloc(wp_info->len, GFP_KERNEL);
+		if (!temp)
+			continue;
+
+		/* refetch the wp data and compare it to the old value */
+		if (!read_guest_abs(vcpu, wp_info->phys_addr, temp,
+				    wp_info->len)) {
+			if (memcmp(temp, wp_info->old_data, wp_info->len)) {
+				kfree(temp);
+				return wp_info;
+			}
+		}
+		kfree(temp);
+		temp = NULL;
+	}
+
+	return NULL;
+}
+
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
+{
+	vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+	vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+}
+
+#define per_bp_event(code) \
+			(code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
+#define per_write_wp_event(code) \
+			(code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
+
+static int debug_exit_required(struct kvm_vcpu *vcpu)
+{
+	u32 perc = (vcpu->arch.sie_block->perc << 24);
+	struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+	struct kvm_hw_wp_info_arch *wp_info = NULL;
+	struct kvm_hw_bp_info_arch *bp_info = NULL;
+	unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
+	unsigned long peraddr = vcpu->arch.sie_block->peraddr;
+
+	if (guestdbg_hw_bp_enabled(vcpu)) {
+		if (per_write_wp_event(perc) &&
+		    vcpu->arch.guestdbg.nr_hw_wp > 0) {
+			wp_info = any_wp_changed(vcpu);
+			if (wp_info) {
+				debug_exit->addr = wp_info->addr;
+				debug_exit->type = KVM_HW_WP_WRITE;
+				goto exit_required;
+			}
+		}
+		if (per_bp_event(perc) &&
+			 vcpu->arch.guestdbg.nr_hw_bp > 0) {
+			bp_info = find_hw_bp(vcpu, addr);
+			/* remove duplicate events if PC==PER address */
+			if (bp_info && (addr != peraddr)) {
+				debug_exit->addr = addr;
+				debug_exit->type = KVM_HW_BP;
+				vcpu->arch.guestdbg.last_bp = addr;
+				goto exit_required;
+			}
+			/* breakpoint missed */
+			bp_info = find_hw_bp(vcpu, peraddr);
+			if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
+				debug_exit->addr = peraddr;
+				debug_exit->type = KVM_HW_BP;
+				goto exit_required;
+			}
+		}
+	}
+	if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
+		debug_exit->addr = addr;
+		debug_exit->type = KVM_SINGLESTEP;
+		goto exit_required;
+	}
+
+	return 0;
+exit_required:
+	return 1;
+}
+
+#define guest_per_enabled(vcpu) \
+			     (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+
+static void filter_guest_per_event(struct kvm_vcpu *vcpu)
+{
+	u32 perc = vcpu->arch.sie_block->perc << 24;
+	u64 peraddr = vcpu->arch.sie_block->peraddr;
+	u64 addr = vcpu->arch.sie_block->gpsw.addr;
+	u64 cr9 = vcpu->arch.sie_block->gcr[9];
+	u64 cr10 = vcpu->arch.sie_block->gcr[10];
+	u64 cr11 = vcpu->arch.sie_block->gcr[11];
+	/* filter all events, demanded by the guest */
+	u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
+
+	if (!guest_per_enabled(vcpu))
+		guest_perc = 0;
+
+	/* filter "successful-branching" events */
+	if (guest_perc & PER_EVENT_BRANCH &&
+	    cr9 & PER_CONTROL_BRANCH_ADDRESS &&
+	    !in_addr_range(addr, cr10, cr11))
+		guest_perc &= ~PER_EVENT_BRANCH;
+
+	/* filter "instruction-fetching" events */
+	if (guest_perc & PER_EVENT_IFETCH &&
+	    !in_addr_range(peraddr, cr10, cr11))
+		guest_perc &= ~PER_EVENT_IFETCH;
+
+	/* All other PER events will be given to the guest */
+	/* TODO: Check alterated address/address space */
+
+	vcpu->arch.sie_block->perc = guest_perc >> 24;
+
+	if (!guest_perc)
+		vcpu->arch.sie_block->iprcc &= ~PGM_PER;
+}
+
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
+{
+	if (debug_exit_required(vcpu))
+		vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
+
+	filter_guest_per_event(vcpu);
+}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
new file mode 100644
index 000000000..9e3779e3e
--- /dev/null
+++ b/arch/s390/kvm/intercept.c
@@ -0,0 +1,379 @@
+/*
+ * in-kernel handling for sie intercepts
+ *
+ * Copyright IBM Corp. 2008, 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
+#include <asm/irq.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace.h"
+#include "trace-s390.h"
+
+
+static const intercept_handler_t instruction_handlers[256] = {
+	[0x01] = kvm_s390_handle_01,
+	[0x82] = kvm_s390_handle_lpsw,
+	[0x83] = kvm_s390_handle_diag,
+	[0xae] = kvm_s390_handle_sigp,
+	[0xb2] = kvm_s390_handle_b2,
+	[0xb6] = kvm_s390_handle_stctl,
+	[0xb7] = kvm_s390_handle_lctl,
+	[0xb9] = kvm_s390_handle_b9,
+	[0xe5] = kvm_s390_handle_e5,
+	[0xeb] = kvm_s390_handle_eb,
+};
+
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+{
+	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+	/* Use the length of the EXECUTE instruction if necessary */
+	if (sie_block->icptstatus & 1) {
+		ilc = (sie_block->icptstatus >> 4) & 0x6;
+		if (!ilc)
+			ilc = 4;
+	}
+	sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+}
+
+static int handle_noop(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->icptcode) {
+	case 0x0:
+		vcpu->stat.exit_null++;
+		break;
+	case 0x10:
+		vcpu->stat.exit_external_request++;
+		break;
+	default:
+		break; /* nothing */
+	}
+	return 0;
+}
+
+static int handle_stop(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc = 0;
+	uint8_t flags, stop_pending;
+
+	vcpu->stat.exit_stop_request++;
+
+	/* delay the stop if any non-stop irq is pending */
+	if (kvm_s390_vcpu_has_irq(vcpu, 1))
+		return 0;
+
+	/* avoid races with the injection/SIGP STOP code */
+	spin_lock(&li->lock);
+	flags = li->irq.stop.flags;
+	stop_pending = kvm_s390_is_stop_irq_pending(vcpu);
+	spin_unlock(&li->lock);
+
+	trace_kvm_s390_stop_request(stop_pending, flags);
+	if (!stop_pending)
+		return 0;
+
+	if (flags & KVM_S390_STOP_FLAG_STORE_STATUS) {
+		rc = kvm_s390_vcpu_store_status(vcpu,
+						KVM_S390_STORE_STATUS_NOADDR);
+		if (rc)
+			return rc;
+	}
+
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
+	return -EOPNOTSUPP;
+}
+
+static int handle_validity(struct kvm_vcpu *vcpu)
+{
+	int viwhy = vcpu->arch.sie_block->ipb >> 16;
+
+	vcpu->stat.exit_validity++;
+	trace_kvm_s390_intercept_validity(vcpu, viwhy);
+	WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
+	return -EOPNOTSUPP;
+}
+
+static int handle_instruction(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	vcpu->stat.exit_instruction++;
+	trace_kvm_s390_intercept_instruction(vcpu,
+					     vcpu->arch.sie_block->ipa,
+					     vcpu->arch.sie_block->ipb);
+	handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
+	if (handler)
+		return handler(vcpu);
+	return -EOPNOTSUPP;
+}
+
+static void __extract_prog_irq(struct kvm_vcpu *vcpu,
+			       struct kvm_s390_pgm_info *pgm_info)
+{
+	memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
+	pgm_info->code = vcpu->arch.sie_block->iprcc;
+
+	switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
+	case PGM_AFX_TRANSLATION:
+	case PGM_ASX_TRANSLATION:
+	case PGM_EX_TRANSLATION:
+	case PGM_LFX_TRANSLATION:
+	case PGM_LSTE_SEQUENCE:
+	case PGM_LSX_TRANSLATION:
+	case PGM_LX_TRANSLATION:
+	case PGM_PRIMARY_AUTHORITY:
+	case PGM_SECONDARY_AUTHORITY:
+	case PGM_SPACE_SWITCH:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		break;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_INSTANCE:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_EXTENDED_AUTHORITY:
+		pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
+		break;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		pgm_info->exc_access_id  = vcpu->arch.sie_block->eai;
+		pgm_info->op_access_id  = vcpu->arch.sie_block->oai;
+		break;
+	case PGM_MONITOR:
+		pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
+		pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
+		break;
+	case PGM_VECTOR_PROCESSING:
+	case PGM_DATA:
+		pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
+		break;
+	case PGM_PROTECTION:
+		pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
+		pgm_info->exc_access_id  = vcpu->arch.sie_block->eai;
+		break;
+	default:
+		break;
+	}
+
+	if (vcpu->arch.sie_block->iprcc & PGM_PER) {
+		pgm_info->per_code = vcpu->arch.sie_block->perc;
+		pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
+		pgm_info->per_address = vcpu->arch.sie_block->peraddr;
+		pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
+	}
+}
+
+/*
+ * restore ITDB to program-interruption TDB in guest lowcore
+ * and set TX abort indication if required
+*/
+static int handle_itdb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_itdb *itdb;
+	int rc;
+
+	if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
+		return 0;
+	if (current->thread.per_flags & PER_FLAG_NO_TE)
+		return 0;
+	itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+	rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
+	if (rc)
+		return rc;
+	memset(itdb, 0, sizeof(*itdb));
+
+	return 0;
+}
+
+#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+
+static int handle_prog(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_pgm_info pgm_info;
+	psw_t psw;
+	int rc;
+
+	vcpu->stat.exit_program_interruption++;
+
+	if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+		kvm_s390_handle_per_event(vcpu);
+		/* the interrupt might have been filtered out completely */
+		if (vcpu->arch.sie_block->iprcc == 0)
+			return 0;
+	}
+
+	trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
+	if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
+		rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
+		if (rc)
+			return rc;
+		/* Avoid endless loops of specification exceptions */
+		if (!is_valid_psw(&psw))
+			return -EOPNOTSUPP;
+	}
+	rc = handle_itdb(vcpu);
+	if (rc)
+		return rc;
+
+	__extract_prog_irq(vcpu, &pgm_info);
+	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+}
+
+static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
+{
+	int rc, rc2;
+
+	vcpu->stat.exit_instr_and_program++;
+	rc = handle_instruction(vcpu);
+	rc2 = handle_prog(vcpu);
+
+	if (rc == -EOPNOTSUPP)
+		vcpu->arch.sie_block->icptcode = 0x04;
+	if (rc)
+		return rc;
+	return rc2;
+}
+
+/**
+ * handle_external_interrupt - used for external interruption interceptions
+ *
+ * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
+ * the new PSW does not have external interrupts disabled. In the first case,
+ * we've got to deliver the interrupt manually, and in the second case, we
+ * drop to userspace to handle the situation there.
+ */
+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
+{
+	u16 eic = vcpu->arch.sie_block->eic;
+	struct kvm_s390_irq irq;
+	psw_t newpsw;
+	int rc;
+
+	vcpu->stat.exit_external_interrupt++;
+
+	rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
+	if (rc)
+		return rc;
+	/* We can not handle clock comparator or timer interrupt with bad PSW */
+	if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
+	    (newpsw.mask & PSW_MASK_EXT))
+		return -EOPNOTSUPP;
+
+	switch (eic) {
+	case EXT_IRQ_CLK_COMP:
+		irq.type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case EXT_IRQ_CPU_TIMER:
+		irq.type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case EXT_IRQ_EXTERNAL_CALL:
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
+		rc = kvm_s390_inject_vcpu(vcpu, &irq);
+		/* ignore if another external call is already pending */
+		if (rc == -EBUSY)
+			return 0;
+		return rc;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return kvm_s390_inject_vcpu(vcpu, &irq);
+}
+
+/**
+ * Handle MOVE PAGE partial execution interception.
+ *
+ * This interception can only happen for guests with DAT disabled and
+ * addresses that are currently not mapped in the host. Thus we try to
+ * set up the mappings for the corresponding user pages here (or throw
+ * addressing exceptions in case of illegal guest addresses).
+ */
+static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
+{
+	unsigned long srcaddr, dstaddr;
+	int reg1, reg2, rc;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	/* Make sure that the source is paged-in */
+	rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
+				     reg2, &srcaddr, 0);
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
+	if (rc != 0)
+		return rc;
+
+	/* Make sure that the destination is paged-in */
+	rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
+				     reg1, &dstaddr, 1);
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
+	if (rc != 0)
+		return rc;
+
+	kvm_s390_rewind_psw(vcpu, 4);
+
+	return 0;
+}
+
+static int handle_partial_execution(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->ipa == 0xb254)	/* MVPG */
+		return handle_mvpg_pei(vcpu);
+	if (vcpu->arch.sie_block->ipa >> 8 == 0xae)	/* SIGP */
+		return kvm_s390_handle_sigp_pei(vcpu);
+
+	return -EOPNOTSUPP;
+}
+
+static const intercept_handler_t intercept_funcs[] = {
+	[0x00 >> 2] = handle_noop,
+	[0x04 >> 2] = handle_instruction,
+	[0x08 >> 2] = handle_prog,
+	[0x0C >> 2] = handle_instruction_and_prog,
+	[0x10 >> 2] = handle_noop,
+	[0x14 >> 2] = handle_external_interrupt,
+	[0x18 >> 2] = handle_noop,
+	[0x1C >> 2] = kvm_s390_handle_wait,
+	[0x20 >> 2] = handle_validity,
+	[0x28 >> 2] = handle_stop,
+	[0x38 >> 2] = handle_partial_execution,
+};
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t func;
+	u8 code = vcpu->arch.sie_block->icptcode;
+
+	if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs))
+		return -EOPNOTSUPP;
+	func = intercept_funcs[code >> 2];
+	if (func)
+		return func(vcpu);
+	return -EOPNOTSUPP;
+}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
new file mode 100644
index 000000000..b745a109b
--- /dev/null
+++ b/arch/s390/kvm/interrupt.c
@@ -0,0 +1,2268 @@
+/*
+ * handling kvm guest interrupts
+ *
+ * Copyright IBM Corp. 2008, 2015
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kvm_host.h>
+#include <linux/hrtimer.h>
+#include <linux/mmu_context.h>
+#include <linux/signal.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
+#include <asm/asm-offsets.h>
+#include <asm/dis.h>
+#include <asm/uaccess.h>
+#include <asm/sclp.h>
+#include <asm/isc.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace-s390.h"
+
+#define IOINT_SCHID_MASK 0x0000ffff
+#define IOINT_SSID_MASK 0x00030000
+#define IOINT_CSSID_MASK 0x03fc0000
+#define IOINT_AI_MASK 0x04000000
+#define PFAULT_INIT 0x0600
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
+
+int psw_extint_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
+}
+
+static int psw_ioint_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO);
+}
+
+static int psw_mchk_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK);
+}
+
+static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
+{
+	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
+	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
+	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
+		return 0;
+	return 1;
+}
+
+static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+	if (psw_extint_disabled(vcpu) ||
+	    !(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		return 0;
+	if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
+		/* No timer interrupts when single stepping */
+		return 0;
+	return 1;
+}
+
+static int ckc_irq_pending(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->arch.sie_block->ckc <
+	      get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+		return 0;
+	return ckc_interrupts_enabled(vcpu);
+}
+
+static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+	return !psw_extint_disabled(vcpu) &&
+	       (vcpu->arch.sie_block->gcr[0] & 0x400ul);
+}
+
+static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.sie_block->cputm >> 63) &&
+	       cpu_timer_interrupts_enabled(vcpu);
+}
+
+static inline int is_ioirq(unsigned long irq_type)
+{
+	return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
+		(irq_type <= IRQ_PEND_IO_ISC_7));
+}
+
+static uint64_t isc_to_isc_bits(int isc)
+{
+	return (0x80 >> isc) << 24;
+}
+
+static inline u8 int_word_to_isc(u32 int_word)
+{
+	return (int_word & 0x38000000) >> 27;
+}
+
+static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->arch.float_int.pending_irqs;
+}
+
+static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.local_int.pending_irqs;
+}
+
+static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
+				   unsigned long active_mask)
+{
+	int i;
+
+	for (i = 0; i <= MAX_ISC; i++)
+		if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
+			active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i));
+
+	return active_mask;
+}
+
+static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
+{
+	unsigned long active_mask;
+
+	active_mask = pending_local_irqs(vcpu);
+	active_mask |= pending_floating_irqs(vcpu);
+
+	if (psw_extint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_EXT_MASK;
+	if (psw_ioint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_IO_MASK;
+	else
+		active_mask = disable_iscs(vcpu, active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
+		__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
+		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+		__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
+	if (psw_mchk_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_MCHK_MASK;
+	if (!(vcpu->arch.sie_block->gcr[14] &
+	      vcpu->kvm->arch.float_int.mchk.cr14))
+		__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
+
+	/*
+	 * STOP irqs will never be actively delivered. They are triggered via
+	 * intercept requests and cleared when the stop intercept is performed.
+	 */
+	__clear_bit(IRQ_PEND_SIGP_STOP, &active_mask);
+
+	return active_mask;
+}
+
+static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+	atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+			  &vcpu->arch.sie_block->cpuflags);
+	vcpu->arch.sie_block->lctl = 0x0000;
+	vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
+
+	if (guestdbg_enabled(vcpu)) {
+		vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
+					       LCTL_CR10 | LCTL_CR11);
+		vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
+	}
+}
+
+static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
+{
+	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+}
+
+static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
+		return;
+	else if (psw_ioint_disabled(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_IO_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR6;
+}
+
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+		return;
+	if (psw_extint_disabled(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+		return;
+	if (psw_mchk_disabled(vcpu))
+		vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
+{
+	if (kvm_s390_is_stop_irq_pending(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+}
+
+/* Set interception request for non-deliverable interrupts */
+static void set_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+	set_intercept_indicators_io(vcpu);
+	set_intercept_indicators_ext(vcpu);
+	set_intercept_indicators_mchk(vcpu);
+	set_intercept_indicators_stop(vcpu);
+}
+
+static u16 get_ilc(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->icptcode) {
+	case ICPT_INST:
+	case ICPT_INSTPROGI:
+	case ICPT_OPEREXC:
+	case ICPT_PARTEXEC:
+	case ICPT_IOINST:
+		/* last instruction only stored for these icptcodes */
+		return insn_length(vcpu->arch.sie_block->ipa >> 8);
+	case ICPT_PROGI:
+		return vcpu->arch.sie_block->pgmilc;
+	default:
+		return 0;
+	}
+}
+
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+			   (u16 __user *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_ext_info ext;
+	int rc;
+
+	spin_lock(&li->lock);
+	ext = li->irq.ext;
+	clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	li->irq.ext.ext_params2 = 0;
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
+		   0, ext.ext_params2);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_INIT,
+					 0, ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info mchk = {};
+	unsigned long adtl_status_addr;
+	int deliver = 0;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	spin_lock(&li->lock);
+	if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
+	    test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) {
+		/*
+		 * If there was an exigent machine check pending, then any
+		 * repressible machine checks that might have been pending
+		 * are indicated along with it, so always clear bits for
+		 * repressible and exigent interrupts
+		 */
+		mchk = li->irq.mchk;
+		clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+		clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+		memset(&li->irq.mchk, 0, sizeof(mchk));
+		deliver = 1;
+	}
+	/*
+	 * We indicate floating repressible conditions along with
+	 * other pending conditions. Channel Report Pending and Channel
+	 * Subsystem damage are the only two and and are indicated by
+	 * bits in mcic and masked in cr14.
+	 */
+	if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+		mchk.mcic |= fi->mchk.mcic;
+		mchk.cr14 |= fi->mchk.cr14;
+		memset(&fi->mchk, 0, sizeof(mchk));
+		deliver = 1;
+	}
+	spin_unlock(&li->lock);
+	spin_unlock(&fi->lock);
+
+	if (deliver) {
+		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+			   mchk.mcic);
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+						 KVM_S390_MCHK,
+						 mchk.cr14, mchk.mcic);
+
+		rc  = kvm_s390_vcpu_store_status(vcpu,
+						 KVM_S390_STORE_STATUS_PREFIXED);
+		rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR,
+				    &adtl_status_addr,
+				    sizeof(unsigned long));
+		rc |= kvm_s390_vcpu_store_adtl_status(vcpu,
+						      adtl_status_addr);
+		rc |= put_guest_lc(vcpu, mchk.mcic,
+				   (u64 __user *) __LC_MCCK_CODE);
+		rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+				   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+				     &mchk.fixed_logout,
+				     sizeof(mchk.fixed_logout));
+		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw,
+				     sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw,
+				    sizeof(psw_t));
+	}
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+	vcpu->stat.deliver_restart_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+	rc  = write_guest_lc(vcpu,
+			     offsetof(struct _lowcore, restart_old_psw),
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info prefix;
+
+	spin_lock(&li->lock);
+	prefix = li->irq.prefix;
+	li->irq.prefix.address = 0;
+	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
+	vcpu->stat.deliver_prefix_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_SIGP_SET_PREFIX,
+					 prefix.address, 0);
+
+	kvm_s390_set_prefix(vcpu, prefix.address);
+	return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+	int cpu_addr;
+
+	spin_lock(&li->lock);
+	cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	clear_bit(cpu_addr, li->sigp_emerg_pending);
+	if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+	vcpu->stat.deliver_emergency_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+					 cpu_addr, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info extcall;
+	int rc;
+
+	spin_lock(&li->lock);
+	extcall = li->irq.extcall;
+	li->irq.extcall.code = 0;
+	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+	vcpu->stat.deliver_external_call++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_EXTERNAL_CALL,
+					 extcall.code, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_pgm_info pgm_info;
+	int rc = 0, nullifying = false;
+	u16 ilc = get_ilc(vcpu);
+
+	spin_lock(&li->lock);
+	pgm_info = li->irq.pgm;
+	clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	memset(&li->irq.pgm, 0, sizeof(pgm_info));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+		   pgm_info.code, ilc);
+	vcpu->stat.deliver_program_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+					 pgm_info.code, 0);
+
+	switch (pgm_info.code & ~PGM_PER) {
+	case PGM_AFX_TRANSLATION:
+	case PGM_ASX_TRANSLATION:
+	case PGM_EX_TRANSLATION:
+	case PGM_LFX_TRANSLATION:
+	case PGM_LSTE_SEQUENCE:
+	case PGM_LSX_TRANSLATION:
+	case PGM_LX_TRANSLATION:
+	case PGM_PRIMARY_AUTHORITY:
+	case PGM_SECONDARY_AUTHORITY:
+		nullifying = true;
+		/* fall through */
+	case PGM_SPACE_SWITCH:
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		break;
+	case PGM_ALEN_TRANSLATION:
+	case PGM_ALE_SEQUENCE:
+	case PGM_ASTE_INSTANCE:
+	case PGM_ASTE_SEQUENCE:
+	case PGM_ASTE_VALIDITY:
+	case PGM_EXTENDED_AUTHORITY:
+		rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
+				  (u8 *)__LC_EXC_ACCESS_ID);
+		nullifying = true;
+		break;
+	case PGM_ASCE_TYPE:
+	case PGM_PAGE_TRANSLATION:
+	case PGM_REGION_FIRST_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_SEGMENT_TRANSLATION:
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
+				   (u8 *)__LC_EXC_ACCESS_ID);
+		rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
+				   (u8 *)__LC_OP_ACCESS_ID);
+		nullifying = true;
+		break;
+	case PGM_MONITOR:
+		rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+				  (u16 *)__LC_MON_CLASS_NR);
+		rc |= put_guest_lc(vcpu, pgm_info.mon_code,
+				   (u64 *)__LC_MON_CODE);
+		break;
+	case PGM_VECTOR_PROCESSING:
+	case PGM_DATA:
+		rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
+				  (u32 *)__LC_DATA_EXC_CODE);
+		break;
+	case PGM_PROTECTION:
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
+				  (u64 *)__LC_TRANS_EXC_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
+				   (u8 *)__LC_EXC_ACCESS_ID);
+		break;
+	case PGM_STACK_FULL:
+	case PGM_STACK_EMPTY:
+	case PGM_STACK_SPECIFICATION:
+	case PGM_STACK_TYPE:
+	case PGM_STACK_OPERATION:
+	case PGM_TRACE_TABEL:
+	case PGM_CRYPTO_OPERATION:
+		nullifying = true;
+		break;
+	}
+
+	if (pgm_info.code & PGM_PER) {
+		rc |= put_guest_lc(vcpu, pgm_info.per_code,
+				   (u8 *) __LC_PER_CODE);
+		rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
+				   (u8 *)__LC_PER_ATMID);
+		rc |= put_guest_lc(vcpu, pgm_info.per_address,
+				   (u64 *) __LC_PER_ADDRESS);
+		rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
+				   (u8 *) __LC_PER_ACCESS_ID);
+	}
+
+	if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
+		kvm_s390_rewind_psw(vcpu, ilc);
+
+	rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+	rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
+				 (u64 *) __LC_LAST_BREAK);
+	rc |= put_guest_lc(vcpu, pgm_info.code,
+			   (u16 *)__LC_PGM_INT_CODE);
+	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_ext_info ext;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+		spin_unlock(&fi->lock);
+		return 0;
+	}
+	ext = fi->srv_signal;
+	memset(&fi->srv_signal, 0, sizeof(ext));
+	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+		   ext.ext_params);
+	vcpu->stat.deliver_service_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+					 ext.ext_params, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_interrupt_info *inti;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT],
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				KVM_S390_INT_PFAULT_DONE, 0,
+				inti->ext.ext_params2);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_PFAULT] -= 1;
+	}
+	if (list_empty(&fi->lists[FIRQ_LIST_PFAULT]))
+		clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	if (inti) {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				(u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, PFAULT_DONE,
+				(u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				(u64 *)__LC_EXT_PARAMS2);
+		kfree(inti);
+	}
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_interrupt_info *inti;
+	int rc = 0;
+
+	spin_lock(&fi->lock);
+	inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO],
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		VCPU_EVENT(vcpu, 4,
+			   "interrupt: virtio parm:%x,parm64:%llx",
+			   inti->ext.ext_params, inti->ext.ext_params2);
+		vcpu->stat.deliver_virtio_interrupt++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				inti->type,
+				inti->ext.ext_params,
+				inti->ext.ext_params2);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_VIRTIO] -= 1;
+	}
+	if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO]))
+		clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	if (inti) {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				(u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, VIRTIO_PARAM,
+				(u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+				(u32 *)__LC_EXT_PARAMS);
+		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+				(u64 *)__LC_EXT_PARAMS2);
+		kfree(inti);
+	}
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+				     unsigned long irq_type)
+{
+	struct list_head *isc_list;
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int rc = 0;
+
+	fi = &vcpu->kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0];
+	inti = list_first_entry_or_null(isc_list,
+					struct kvm_s390_interrupt_info,
+					list);
+	if (inti) {
+		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+		vcpu->stat.deliver_io_int++;
+		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+				inti->type,
+				((__u32)inti->io.subchannel_id << 16) |
+				inti->io.subchannel_nr,
+				((__u64)inti->io.io_int_parm << 32) |
+				inti->io.io_int_word);
+		list_del(&inti->list);
+		fi->counters[FIRQ_CNTR_IO] -= 1;
+	}
+	if (list_empty(isc_list))
+		clear_bit(irq_type, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	if (inti) {
+		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+				(u16 *)__LC_SUBCHANNEL_ID);
+		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+				(u16 *)__LC_SUBCHANNEL_NR);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+				(u32 *)__LC_IO_INT_PARM);
+		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+				(u32 *)__LC_IO_INT_WORD);
+		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+				&vcpu->arch.sie_block->gpsw,
+				sizeof(psw_t));
+		kfree(inti);
+	}
+
+	return rc ? -EFAULT : 0;
+}
+
+typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
+
+static const deliver_irq_t deliver_irq_funcs[] = {
+	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+	[IRQ_PEND_MCHK_REP]       = __deliver_machine_check,
+	[IRQ_PEND_PROG]           = __deliver_prog,
+	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
+	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
+	[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
+	[IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
+	[IRQ_PEND_RESTART]        = __deliver_restart,
+	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
+	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+	[IRQ_PEND_EXT_SERVICE]    = __deliver_service,
+	[IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done,
+	[IRQ_PEND_VIRTIO]         = __deliver_virtio,
+};
+
+/* Check whether an external call is pending (deliverable or not) */
+int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+	if (!sclp_has_sigpif())
+		return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+
+	return (sigp_ctrl & SIGP_CTRL_C) &&
+	       (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND);
+}
+
+int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
+{
+	int rc;
+
+	rc = !!deliverable_irqs(vcpu);
+
+	if (!rc && kvm_cpu_has_pending_timer(vcpu))
+		rc = 1;
+
+	/* external call pending and deliverable */
+	if (!rc && kvm_s390_ext_call_pending(vcpu) &&
+	    !psw_extint_disabled(vcpu) &&
+	    (vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+		rc = 1;
+
+	if (!rc && !exclude_stop && kvm_s390_is_stop_irq_pending(vcpu))
+		rc = 1;
+
+	return rc;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
+{
+	u64 now, sltime;
+
+	vcpu->stat.exit_wait_state++;
+
+	/* fast path */
+	if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu))
+		return 0;
+
+	if (psw_interrupts_disabled(vcpu)) {
+		VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
+		return -EOPNOTSUPP; /* disabled wait */
+	}
+
+	if (!ckc_interrupts_enabled(vcpu)) {
+		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
+		__set_cpu_idle(vcpu);
+		goto no_timer;
+	}
+
+	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
+
+	/* underflow */
+	if (vcpu->arch.sie_block->ckc < now)
+		return 0;
+
+	__set_cpu_idle(vcpu);
+	hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
+	VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
+no_timer:
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	kvm_vcpu_block(vcpu);
+	__unset_cpu_idle(vcpu);
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	hrtimer_cancel(&vcpu->arch.ckc_timer);
+	return 0;
+}
+
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
+{
+	if (waitqueue_active(&vcpu->wq)) {
+		/*
+		 * The vcpu gave up the cpu voluntarily, mark it as a good
+		 * yield-candidate.
+		 */
+		vcpu->preempted = true;
+		wake_up_interruptible(&vcpu->wq);
+		vcpu->stat.halt_wakeup++;
+	}
+}
+
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
+{
+	struct kvm_vcpu *vcpu;
+	u64 now, sltime;
+
+	vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+	now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+	sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
+
+	/*
+	 * If the monotonic clock runs faster than the tod clock we might be
+	 * woken up too early and have to go back to sleep to avoid deadlocks.
+	 */
+	if (vcpu->arch.sie_block->ckc > now &&
+	    hrtimer_forward_now(timer, ns_to_ktime(sltime)))
+		return HRTIMER_RESTART;
+	kvm_s390_vcpu_wakeup(vcpu);
+	return HRTIMER_NORESTART;
+}
+
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	spin_lock(&li->lock);
+	li->pending_irqs = 0;
+	bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	memset(&li->irq, 0, sizeof(li->irq));
+	spin_unlock(&li->lock);
+
+	/* clear pending external calls set by sigp interpretation facility */
+	atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
+	vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
+}
+
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	deliver_irq_t func;
+	int rc = 0;
+	unsigned long irq_type;
+	unsigned long irqs;
+
+	__reset_intercept_indicators(vcpu);
+
+	/* pending ckc conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	if (ckc_irq_pending(vcpu))
+		set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+	/* pending cpu timer conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	if (cpu_timer_irq_pending(vcpu))
+		set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+
+	do {
+		irqs = deliverable_irqs(vcpu);
+		/* bits are in the order of interrupt priority */
+		irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
+		if (irq_type == IRQ_PEND_COUNT)
+			break;
+		if (is_ioirq(irq_type)) {
+			rc = __deliver_io(vcpu, irq_type);
+		} else {
+			func = deliver_irq_funcs[irq_type];
+			if (!func) {
+				WARN_ON_ONCE(func == NULL);
+				clear_bit(irq_type, &li->pending_irqs);
+				continue;
+			}
+			rc = func(vcpu);
+		}
+		if (rc)
+			break;
+	} while (!rc);
+
+	set_intercept_indicators(vcpu);
+
+	return rc;
+}
+
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	li->irq.pgm = irq->u.pgm;
+	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	return 0;
+}
+
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq irq;
+
+	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
+				   0, 1);
+	spin_lock(&li->lock);
+	irq.u.pgm.code = code;
+	__inject_prog(vcpu, &irq);
+	BUG_ON(waitqueue_active(li->wq));
+	spin_unlock(&li->lock);
+	return 0;
+}
+
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+			     struct kvm_s390_pgm_info *pgm_info)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq irq;
+	int rc;
+
+	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
+		   pgm_info->code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+				   pgm_info->code, 0, 1);
+	spin_lock(&li->lock);
+	irq.u.pgm = *pgm_info;
+	rc = __inject_prog(vcpu, &irq);
+	BUG_ON(waitqueue_active(li->wq));
+	spin_unlock(&li->lock);
+	return rc;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
+		   irq->u.ext.ext_params, irq->u.ext.ext_params2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+				   irq->u.ext.ext_params,
+				   irq->u.ext.ext_params2, 2);
+
+	li->irq.ext = irq->u.ext;
+	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
+{
+	unsigned char new_val, old_val;
+	uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+	new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
+	old_val = *sigp_ctrl & ~SIGP_CTRL_C;
+	if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
+		/* another external call is pending */
+		return -EBUSY;
+	}
+	atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	return 0;
+}
+
+static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+	uint16_t src_id = irq->u.extcall.code;
+
+	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+		   src_id);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+				   src_id, 0, 2);
+
+	/* sending vcpu invalid */
+	if (src_id >= KVM_MAX_VCPUS ||
+	    kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
+		return -EINVAL;
+
+	if (sclp_has_sigpif())
+		return __inject_extcall_sigpif(vcpu, src_id);
+
+	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
+		return -EBUSY;
+	*extcall = irq->u.extcall;
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+		   irq->u.prefix.address);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+				   irq->u.prefix.address, 0, 2);
+
+	if (!is_vcpu_stopped(vcpu))
+		return -EBUSY;
+
+	*prefix = irq->u.prefix;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	return 0;
+}
+
+#define KVM_S390_STOP_SUPP_FLAGS (KVM_S390_STOP_FLAG_STORE_STATUS)
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_stop_info *stop = &li->irq.stop;
+	int rc = 0;
+
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+
+	if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
+		return -EINVAL;
+
+	if (is_vcpu_stopped(vcpu)) {
+		if (irq->u.stop.flags & KVM_S390_STOP_FLAG_STORE_STATUS)
+			rc = kvm_s390_store_status_unloaded(vcpu,
+						KVM_S390_STORE_STATUS_NOADDR);
+		return rc;
+	}
+
+	if (test_and_set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs))
+		return -EBUSY;
+	stop->flags = irq->u.stop.flags;
+	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+	return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+				 struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+
+	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				   struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+		   irq->u.emerg.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+				   irq->u.emerg.code, 0, 2);
+
+	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
+	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+		   irq->u.mchk.mcic);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+				   irq->u.mchk.mcic, 2);
+
+	/*
+	 * Because repressible machine checks can be indicated along with
+	 * exigent machine checks (PoP, Chapter 11, Interruption action)
+	 * we need to combine cr14, mcic and external damage code.
+	 * Failing storage address and the logout area should not be or'ed
+	 * together, we just indicate the last occurrence of the corresponding
+	 * machine check
+	 */
+	mchk->cr14 |= irq->u.mchk.cr14;
+	mchk->mcic |= irq->u.mchk.mcic;
+	mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+	mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+	memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+	       sizeof(mchk->fixed_logout));
+	if (mchk->mcic & MCHK_EX_MASK)
+		set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	else if (mchk->mcic & MCHK_REP_MASK)
+		set_bit(IRQ_PEND_MCHK_REP,  &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
+						  int isc, u32 schid)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+	struct kvm_s390_interrupt_info *iter;
+	u16 id = (schid & 0xffff0000U) >> 16;
+	u16 nr = schid & 0x0000ffffU;
+
+	spin_lock(&fi->lock);
+	list_for_each_entry(iter, isc_list, list) {
+		if (schid && (id != iter->io.subchannel_id ||
+			      nr != iter->io.subchannel_nr))
+			continue;
+		/* found an appropriate entry */
+		list_del_init(&iter->list);
+		fi->counters[FIRQ_CNTR_IO] -= 1;
+		if (list_empty(isc_list))
+			clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+		spin_unlock(&fi->lock);
+		return iter;
+	}
+	spin_unlock(&fi->lock);
+	return NULL;
+}
+
+/*
+ * Dequeue and return an I/O interrupt matching any of the interruption
+ * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ */
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+						    u64 isc_mask, u32 schid)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int isc;
+
+	for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+		if (isc_mask & isc_to_isc_bits(isc))
+			inti = get_io_int(kvm, isc, schid);
+	}
+	return inti;
+}
+
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int __inject_service(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+	/*
+	 * Early versions of the QEMU s390 bios will inject several
+	 * service interrupts after another without handling a
+	 * condition code indicating busy.
+	 * We will silently ignore those superfluous sccb values.
+	 * A future version of QEMU will take care of serialization
+	 * of servc requests
+	 */
+	if (fi->srv_signal.ext_params & SCCB_MASK)
+		goto out;
+	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
+	set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+out:
+	spin_unlock(&fi->lock);
+	kfree(inti);
+	return 0;
+}
+
+static int __inject_virtio(struct kvm *kvm,
+			    struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_VIRTIO] += 1;
+	list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]);
+	set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+static int __inject_pfault_done(struct kvm *kvm,
+				 struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_PFAULT] >=
+		(ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_PFAULT] += 1;
+	list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]);
+	set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+#define CR_PENDING_SUBCLASS 28
+static int __inject_float_mchk(struct kvm *kvm,
+				struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
+	fi->mchk.mcic |= inti->mchk.mcic;
+	set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	kfree(inti);
+	return 0;
+}
+
+static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_float_interrupt *fi;
+	struct list_head *list;
+	int isc;
+
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
+		spin_unlock(&fi->lock);
+		return -EBUSY;
+	}
+	fi->counters[FIRQ_CNTR_IO] += 1;
+
+	isc = int_word_to_isc(inti->io.io_int_word);
+	list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+	list_add_tail(&inti->list, list);
+	set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_local_interrupt *li;
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_vcpu *dst_vcpu = NULL;
+	int sigcpu;
+	u64 type = READ_ONCE(inti->type);
+	int rc;
+
+	fi = &kvm->arch.float_int;
+
+	switch (type) {
+	case KVM_S390_MCHK:
+		rc = __inject_float_mchk(kvm, inti);
+		break;
+	case KVM_S390_INT_VIRTIO:
+		rc = __inject_virtio(kvm, inti);
+		break;
+	case KVM_S390_INT_SERVICE:
+		rc = __inject_service(kvm, inti);
+		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		rc = __inject_pfault_done(kvm, inti);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		rc = __inject_io(kvm, inti);
+		break;
+	default:
+		rc = -EINVAL;
+	}
+	if (rc)
+		return rc;
+
+	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+	if (sigcpu == KVM_MAX_VCPUS) {
+		do {
+			sigcpu = fi->next_rr_cpu++;
+			if (sigcpu == KVM_MAX_VCPUS)
+				sigcpu = fi->next_rr_cpu = 0;
+		} while (kvm_get_vcpu(kvm, sigcpu) == NULL);
+	}
+	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
+	li = &dst_vcpu->arch.local_int;
+	spin_lock(&li->lock);
+	switch (type) {
+	case KVM_S390_MCHK:
+		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+		break;
+	default:
+		atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+		break;
+	}
+	spin_unlock(&li->lock);
+	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
+	return 0;
+
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+		       struct kvm_s390_interrupt *s390int)
+{
+	struct kvm_s390_interrupt_info *inti;
+	int rc;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = s390int->type;
+	switch (inti->type) {
+	case KVM_S390_INT_VIRTIO:
+		VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
+			 s390int->parm, s390int->parm64);
+		inti->ext.ext_params = s390int->parm;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
+	case KVM_S390_INT_SERVICE:
+		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+		inti->ext.ext_params = s390int->parm;
+		break;
+	case KVM_S390_INT_PFAULT_DONE:
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
+	case KVM_S390_MCHK:
+		VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+			 s390int->parm64);
+		inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
+		inti->mchk.mcic = s390int->parm64;
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		if (inti->type & IOINT_AI_MASK)
+			VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
+		else
+			VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
+				 s390int->type & IOINT_CSSID_MASK,
+				 s390int->type & IOINT_SSID_MASK,
+				 s390int->type & IOINT_SCHID_MASK);
+		inti->io.subchannel_id = s390int->parm >> 16;
+		inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
+		inti->io.io_int_parm = s390int->parm64 >> 32;
+		inti->io.io_int_word = s390int->parm64 & 0x00000000ffffffffull;
+		break;
+	default:
+		kfree(inti);
+		return -EINVAL;
+	}
+	trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
+				 2);
+
+	rc = __inject_vm(kvm, inti);
+	if (rc)
+		kfree(inti);
+	return rc;
+}
+
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+			      struct kvm_s390_interrupt_info *inti)
+{
+	return __inject_vm(kvm, inti);
+}
+
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+		       struct kvm_s390_irq *irq)
+{
+	irq->type = s390int->type;
+	switch (irq->type) {
+	case KVM_S390_PROGRAM_INT:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.pgm.code = s390int->parm;
+		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		irq->u.prefix.address = s390int->parm;
+		break;
+	case KVM_S390_SIGP_STOP:
+		irq->u.stop.flags = s390int->parm;
+		break;
+	case KVM_S390_INT_EXTERNAL_CALL:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.extcall.code = s390int->parm;
+		break;
+	case KVM_S390_INT_EMERGENCY:
+		if (s390int->parm & 0xffff0000)
+			return -EINVAL;
+		irq->u.emerg.code = s390int->parm;
+		break;
+	case KVM_S390_MCHK:
+		irq->u.mchk.mcic = s390int->parm64;
+		break;
+	}
+	return 0;
+}
+
+int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+}
+
+void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	spin_lock(&li->lock);
+	li->irq.stop.flags = 0;
+	clear_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+	spin_unlock(&li->lock);
+}
+
+static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	int rc;
+
+	switch (irq->type) {
+	case KVM_S390_PROGRAM_INT:
+		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
+			   irq->u.pgm.code);
+		rc = __inject_prog(vcpu, irq);
+		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		rc = __inject_set_prefix(vcpu, irq);
+		break;
+	case KVM_S390_SIGP_STOP:
+		rc = __inject_sigp_stop(vcpu, irq);
+		break;
+	case KVM_S390_RESTART:
+		rc = __inject_sigp_restart(vcpu, irq);
+		break;
+	case KVM_S390_INT_CLOCK_COMP:
+		rc = __inject_ckc(vcpu);
+		break;
+	case KVM_S390_INT_CPU_TIMER:
+		rc = __inject_cpu_timer(vcpu);
+		break;
+	case KVM_S390_INT_EXTERNAL_CALL:
+		rc = __inject_extcall(vcpu, irq);
+		break;
+	case KVM_S390_INT_EMERGENCY:
+		rc = __inject_sigp_emergency(vcpu, irq);
+		break;
+	case KVM_S390_MCHK:
+		rc = __inject_mchk(vcpu, irq);
+		break;
+	case KVM_S390_INT_PFAULT_INIT:
+		rc = __inject_pfault_init(vcpu, irq);
+		break;
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+	default:
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	spin_lock(&li->lock);
+	rc = do_inject_vcpu(vcpu, irq);
+	spin_unlock(&li->lock);
+	if (!rc)
+		kvm_s390_vcpu_wakeup(vcpu);
+	return rc;
+}
+
+static inline void clear_irq_list(struct list_head *_list)
+{
+	struct kvm_s390_interrupt_info *inti, *n;
+
+	list_for_each_entry_safe(inti, n, _list, list) {
+		list_del(&inti->list);
+		kfree(inti);
+	}
+}
+
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+		       struct kvm_s390_irq *irq)
+{
+	irq->type = inti->type;
+	switch (inti->type) {
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
+	case KVM_S390_INT_VIRTIO:
+		irq->u.ext = inti->ext;
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		irq->u.io = inti->io;
+		break;
+	}
+}
+
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	int i;
+
+	spin_lock(&fi->lock);
+	fi->pending_irqs = 0;
+	memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
+	memset(&fi->mchk, 0, sizeof(fi->mchk));
+	for (i = 0; i < FIRQ_LIST_COUNT; i++)
+		clear_irq_list(&fi->lists[i]);
+	for (i = 0; i < FIRQ_MAX_COUNT; i++)
+		fi->counters[i] = 0;
+	spin_unlock(&fi->lock);
+};
+
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
+{
+	struct kvm_s390_interrupt_info *inti;
+	struct kvm_s390_float_interrupt *fi;
+	struct kvm_s390_irq *buf;
+	struct kvm_s390_irq *irq;
+	int max_irqs;
+	int ret = 0;
+	int n = 0;
+	int i;
+
+	if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+		return -EINVAL;
+
+	/*
+	 * We are already using -ENOMEM to signal
+	 * userspace it may retry with a bigger buffer,
+	 * so we need to use something else for this case
+	 */
+	buf = vzalloc(len);
+	if (!buf)
+		return -ENOBUFS;
+
+	max_irqs = len / sizeof(struct kvm_s390_irq);
+
+	fi = &kvm->arch.float_int;
+	spin_lock(&fi->lock);
+	for (i = 0; i < FIRQ_LIST_COUNT; i++) {
+		list_for_each_entry(inti, &fi->lists[i], list) {
+			if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out;
+			}
+			inti_to_irq(inti, &buf[n]);
+			n++;
+		}
+	}
+	if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+		if (n == max_irqs) {
+			/* signal userspace to try again */
+			ret = -ENOMEM;
+			goto out;
+		}
+		irq = (struct kvm_s390_irq *) &buf[n];
+		irq->type = KVM_S390_INT_SERVICE;
+		irq->u.ext = fi->srv_signal;
+		n++;
+	}
+	if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+		if (n == max_irqs) {
+				/* signal userspace to try again */
+				ret = -ENOMEM;
+				goto out;
+		}
+		irq = (struct kvm_s390_irq *) &buf[n];
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = fi->mchk;
+		n++;
+}
+
+out:
+	spin_unlock(&fi->lock);
+	if (!ret && n > 0) {
+		if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+			ret = -EFAULT;
+	}
+	vfree(buf);
+
+	return ret < 0 ? ret : n;
+}
+
+static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r;
+
+	switch (attr->group) {
+	case KVM_DEV_FLIC_GET_ALL_IRQS:
+		r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
+					  attr->attr);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
+				     u64 addr)
+{
+	struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
+	void *target = NULL;
+	void __user *source;
+	u64 size;
+
+	if (get_user(inti->type, (u64 __user *)addr))
+		return -EFAULT;
+
+	switch (inti->type) {
+	case KVM_S390_INT_PFAULT_INIT:
+	case KVM_S390_INT_PFAULT_DONE:
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+		target = (void *) &inti->ext;
+		source = &uptr->u.ext;
+		size = sizeof(inti->ext);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		target = (void *) &inti->io;
+		source = &uptr->u.io;
+		size = sizeof(inti->io);
+		break;
+	case KVM_S390_MCHK:
+		target = (void *) &inti->mchk;
+		source = &uptr->u.mchk;
+		size = sizeof(inti->mchk);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (copy_from_user(target, source, size))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int enqueue_floating_irq(struct kvm_device *dev,
+				struct kvm_device_attr *attr)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	int r = 0;
+	int len = attr->attr;
+
+	if (len % sizeof(struct kvm_s390_irq) != 0)
+		return -EINVAL;
+	else if (len > KVM_S390_FLIC_MAX_BUFFER)
+		return -EINVAL;
+
+	while (len >= sizeof(struct kvm_s390_irq)) {
+		inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+		if (!inti)
+			return -ENOMEM;
+
+		r = copy_irq_from_user(inti, attr->addr);
+		if (r) {
+			kfree(inti);
+			return r;
+		}
+		r = __inject_vm(dev->kvm, inti);
+		if (r) {
+			kfree(inti);
+			return r;
+		}
+		len -= sizeof(struct kvm_s390_irq);
+		attr->addr += sizeof(struct kvm_s390_irq);
+	}
+
+	return r;
+}
+
+static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
+{
+	if (id >= MAX_S390_IO_ADAPTERS)
+		return NULL;
+	return kvm->arch.adapters[id];
+}
+
+static int register_io_adapter(struct kvm_device *dev,
+			       struct kvm_device_attr *attr)
+{
+	struct s390_io_adapter *adapter;
+	struct kvm_s390_io_adapter adapter_info;
+
+	if (copy_from_user(&adapter_info,
+			   (void __user *)attr->addr, sizeof(adapter_info)))
+		return -EFAULT;
+
+	if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
+	    (dev->kvm->arch.adapters[adapter_info.id] != NULL))
+		return -EINVAL;
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&adapter->maps);
+	init_rwsem(&adapter->maps_lock);
+	atomic_set(&adapter->nr_maps, 0);
+	adapter->id = adapter_info.id;
+	adapter->isc = adapter_info.isc;
+	adapter->maskable = adapter_info.maskable;
+	adapter->masked = false;
+	adapter->swap = adapter_info.swap;
+	dev->kvm->arch.adapters[adapter->id] = adapter;
+
+	return 0;
+}
+
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
+{
+	int ret;
+	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+	if (!adapter || !adapter->maskable)
+		return -EINVAL;
+	ret = adapter->masked;
+	adapter->masked = masked;
+	return ret;
+}
+
+static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+	struct s390_map_info *map;
+	int ret;
+
+	if (!adapter || !addr)
+		return -EINVAL;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	INIT_LIST_HEAD(&map->list);
+	map->guest_addr = addr;
+	map->addr = gmap_translate(kvm->arch.gmap, addr);
+	if (map->addr == -EFAULT) {
+		ret = -EFAULT;
+		goto out;
+	}
+	ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
+	if (ret < 0)
+		goto out;
+	BUG_ON(ret != 1);
+	down_write(&adapter->maps_lock);
+	if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
+		list_add_tail(&map->list, &adapter->maps);
+		ret = 0;
+	} else {
+		put_page(map->page);
+		ret = -EINVAL;
+	}
+	up_write(&adapter->maps_lock);
+out:
+	if (ret)
+		kfree(map);
+	return ret;
+}
+
+static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
+{
+	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+	struct s390_map_info *map, *tmp;
+	int found = 0;
+
+	if (!adapter || !addr)
+		return -EINVAL;
+
+	down_write(&adapter->maps_lock);
+	list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
+		if (map->guest_addr == addr) {
+			found = 1;
+			atomic_dec(&adapter->nr_maps);
+			list_del(&map->list);
+			put_page(map->page);
+			kfree(map);
+			break;
+		}
+	}
+	up_write(&adapter->maps_lock);
+
+	return found ? 0 : -EINVAL;
+}
+
+void kvm_s390_destroy_adapters(struct kvm *kvm)
+{
+	int i;
+	struct s390_map_info *map, *tmp;
+
+	for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
+		if (!kvm->arch.adapters[i])
+			continue;
+		list_for_each_entry_safe(map, tmp,
+					 &kvm->arch.adapters[i]->maps, list) {
+			list_del(&map->list);
+			put_page(map->page);
+			kfree(map);
+		}
+		kfree(kvm->arch.adapters[i]);
+	}
+}
+
+static int modify_io_adapter(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	struct kvm_s390_io_adapter_req req;
+	struct s390_io_adapter *adapter;
+	int ret;
+
+	if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+		return -EFAULT;
+
+	adapter = get_io_adapter(dev->kvm, req.id);
+	if (!adapter)
+		return -EINVAL;
+	switch (req.type) {
+	case KVM_S390_IO_ADAPTER_MASK:
+		ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
+		if (ret > 0)
+			ret = 0;
+		break;
+	case KVM_S390_IO_ADAPTER_MAP:
+		ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
+		break;
+	case KVM_S390_IO_ADAPTER_UNMAP:
+		ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	int r = 0;
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
+
+	switch (attr->group) {
+	case KVM_DEV_FLIC_ENQUEUE:
+		r = enqueue_floating_irq(dev, attr);
+		break;
+	case KVM_DEV_FLIC_CLEAR_IRQS:
+		kvm_s390_clear_float_irqs(dev->kvm);
+		break;
+	case KVM_DEV_FLIC_APF_ENABLE:
+		dev->kvm->arch.gmap->pfault_enabled = 1;
+		break;
+	case KVM_DEV_FLIC_APF_DISABLE_WAIT:
+		dev->kvm->arch.gmap->pfault_enabled = 0;
+		/*
+		 * Make sure no async faults are in transition when
+		 * clearing the queues. So we don't need to worry
+		 * about late coming workers.
+		 */
+		synchronize_srcu(&dev->kvm->srcu);
+		kvm_for_each_vcpu(i, vcpu, dev->kvm)
+			kvm_clear_async_pf_completion_queue(vcpu);
+		break;
+	case KVM_DEV_FLIC_ADAPTER_REGISTER:
+		r = register_io_adapter(dev, attr);
+		break;
+	case KVM_DEV_FLIC_ADAPTER_MODIFY:
+		r = modify_io_adapter(dev, attr);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static int flic_create(struct kvm_device *dev, u32 type)
+{
+	if (!dev)
+		return -EINVAL;
+	if (dev->kvm->arch.flic)
+		return -EINVAL;
+	dev->kvm->arch.flic = dev;
+	return 0;
+}
+
+static void flic_destroy(struct kvm_device *dev)
+{
+	dev->kvm->arch.flic = NULL;
+	kfree(dev);
+}
+
+/* s390 floating irq controller (flic) */
+struct kvm_device_ops kvm_flic_ops = {
+	.name = "kvm-flic",
+	.get_attr = flic_get_attr,
+	.set_attr = flic_set_attr,
+	.create = flic_create,
+	.destroy = flic_destroy,
+};
+
+static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
+{
+	unsigned long bit;
+
+	bit = bit_nr + (addr % PAGE_SIZE) * 8;
+
+	return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
+}
+
+static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
+					  u64 addr)
+{
+	struct s390_map_info *map;
+
+	if (!adapter)
+		return NULL;
+
+	list_for_each_entry(map, &adapter->maps, list) {
+		if (map->guest_addr == addr)
+			return map;
+	}
+	return NULL;
+}
+
+static int adapter_indicators_set(struct kvm *kvm,
+				  struct s390_io_adapter *adapter,
+				  struct kvm_s390_adapter_int *adapter_int)
+{
+	unsigned long bit;
+	int summary_set, idx;
+	struct s390_map_info *info;
+	void *map;
+
+	info = get_map_info(adapter, adapter_int->ind_addr);
+	if (!info)
+		return -1;
+	map = page_address(info->page);
+	bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
+	set_bit(bit, map);
+	idx = srcu_read_lock(&kvm->srcu);
+	mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(info->page);
+	info = get_map_info(adapter, adapter_int->summary_addr);
+	if (!info) {
+		srcu_read_unlock(&kvm->srcu, idx);
+		return -1;
+	}
+	map = page_address(info->page);
+	bit = get_ind_bit(info->addr, adapter_int->summary_offset,
+			  adapter->swap);
+	summary_set = test_and_set_bit(bit, map);
+	mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(info->page);
+	srcu_read_unlock(&kvm->srcu, idx);
+	return summary_set ? 0 : 1;
+}
+
+/*
+ * < 0 - not injected due to error
+ * = 0 - coalesced, summary indicator already active
+ * > 0 - injected interrupt
+ */
+static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
+			   struct kvm *kvm, int irq_source_id, int level,
+			   bool line_status)
+{
+	int ret;
+	struct s390_io_adapter *adapter;
+
+	/* We're only interested in the 0->1 transition. */
+	if (!level)
+		return 0;
+	adapter = get_io_adapter(kvm, e->adapter.adapter_id);
+	if (!adapter)
+		return -1;
+	down_read(&adapter->maps_lock);
+	ret = adapter_indicators_set(kvm, adapter, &e->adapter);
+	up_read(&adapter->maps_lock);
+	if ((ret > 0) && !adapter->masked) {
+		struct kvm_s390_interrupt s390int = {
+			.type = KVM_S390_INT_IO(1, 0, 0, 0),
+			.parm = 0,
+			.parm64 = (adapter->isc << 27) | 0x80000000,
+		};
+		ret = kvm_s390_inject_vm(kvm, &s390int);
+		if (ret == 0)
+			ret = 1;
+	}
+	return ret;
+}
+
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	int ret;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_S390_ADAPTER:
+		e->set = set_adapter_int;
+		e->adapter.summary_addr = ue->u.adapter.summary_addr;
+		e->adapter.ind_addr = ue->u.adapter.ind_addr;
+		e->adapter.summary_offset = ue->u.adapter.summary_offset;
+		e->adapter.ind_offset = ue->u.adapter.ind_offset;
+		e->adapter.adapter_id = ue->u.adapter.adapter_id;
+		ret = 0;
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
+		int irq_source_id, int level, bool line_status)
+{
+	return -EINVAL;
+}
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq *buf;
+	int r = 0;
+	int n;
+
+	buf = vmalloc(len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user((void *) buf, irqstate, len)) {
+		r = -EFAULT;
+		goto out_free;
+	}
+
+	/*
+	 * Don't allow setting the interrupt state
+	 * when there are already interrupts pending
+	 */
+	spin_lock(&li->lock);
+	if (li->pending_irqs) {
+		r = -EBUSY;
+		goto out_unlock;
+	}
+
+	for (n = 0; n < len / sizeof(*buf); n++) {
+		r = do_inject_vcpu(vcpu, &buf[n]);
+		if (r)
+			break;
+	}
+
+out_unlock:
+	spin_unlock(&li->lock);
+out_free:
+	vfree(buf);
+
+	return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+			    struct kvm_s390_irq *irq,
+			    unsigned long irq_type)
+{
+	switch (irq_type) {
+	case IRQ_PEND_MCHK_EX:
+	case IRQ_PEND_MCHK_REP:
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = li->irq.mchk;
+		break;
+	case IRQ_PEND_PROG:
+		irq->type = KVM_S390_PROGRAM_INT;
+		irq->u.pgm = li->irq.pgm;
+		break;
+	case IRQ_PEND_PFAULT_INIT:
+		irq->type = KVM_S390_INT_PFAULT_INIT;
+		irq->u.ext = li->irq.ext;
+		break;
+	case IRQ_PEND_EXT_EXTERNAL:
+		irq->type = KVM_S390_INT_EXTERNAL_CALL;
+		irq->u.extcall = li->irq.extcall;
+		break;
+	case IRQ_PEND_EXT_CLOCK_COMP:
+		irq->type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case IRQ_PEND_EXT_CPU_TIMER:
+		irq->type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case IRQ_PEND_SIGP_STOP:
+		irq->type = KVM_S390_SIGP_STOP;
+		irq->u.stop = li->irq.stop;
+		break;
+	case IRQ_PEND_RESTART:
+		irq->type = KVM_S390_RESTART;
+		break;
+	case IRQ_PEND_SET_PREFIX:
+		irq->type = KVM_S390_SIGP_SET_PREFIX;
+		irq->u.prefix = li->irq.prefix;
+		break;
+	}
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+	unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	unsigned long pending_irqs;
+	struct kvm_s390_irq irq;
+	unsigned long irq_type;
+	int cpuaddr;
+	int n = 0;
+
+	spin_lock(&li->lock);
+	pending_irqs = li->pending_irqs;
+	memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+	       sizeof(sigp_emerg_pending));
+	spin_unlock(&li->lock);
+
+	for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+		memset(&irq, 0, sizeof(irq));
+		if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+			continue;
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+		for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+			memset(&irq, 0, sizeof(irq));
+			if (n + sizeof(irq) > len)
+				return -ENOBUFS;
+			irq.type = KVM_S390_INT_EMERGENCY;
+			irq.u.emerg.code = cpuaddr;
+			if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+				return -EFAULT;
+			n += sizeof(irq);
+		}
+	}
+
+	if ((sigp_ctrl & SIGP_CTRL_C) &&
+	    (atomic_read(&vcpu->arch.sie_block->cpuflags) &
+	     CPUSTAT_ECALL_PEND)) {
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		memset(&irq, 0, sizeof(irq));
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	return n;
+}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 000000000..d98e41596
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,22 @@
+/*
+ * s390 irqchip routines
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ */
+#ifndef __KVM_IRQ_H
+#define __KVM_IRQ_H
+
+#include <linux/kvm_host.h>
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+	return 1;
+}
+
+#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
new file mode 100644
index 000000000..8cd8e7b28
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,2628 @@
+/*
+ * hosting zSeries kernel virtual machines
+ *
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Heiko Carstens <heiko.carstens@de.ibm.com>
+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ *               Jason J. Herne <jjherne@us.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/init.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/vmalloc.h>
+#include <asm/asm-offsets.h>
+#include <asm/lowcore.h>
+#include <asm/pgtable.h>
+#include <asm/nmi.h>
+#include <asm/switch_to.h>
+#include <asm/isc.h>
+#include <asm/sclp.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#include "trace-s390.h"
+
+#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+			   (KVM_MAX_VCPUS + LOCAL_IRQS))
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "userspace_handled", VCPU_STAT(exit_userspace) },
+	{ "exit_null", VCPU_STAT(exit_null) },
+	{ "exit_validity", VCPU_STAT(exit_validity) },
+	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
+	{ "exit_external_request", VCPU_STAT(exit_external_request) },
+	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
+	{ "exit_instruction", VCPU_STAT(exit_instruction) },
+	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
+	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
+	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
+	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
+	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
+	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
+	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
+	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
+	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
+	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
+	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
+	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
+	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
+	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
+	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
+	{ "instruction_spx", VCPU_STAT(instruction_spx) },
+	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
+	{ "instruction_stap", VCPU_STAT(instruction_stap) },
+	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
+	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
+	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
+	{ "instruction_essa", VCPU_STAT(instruction_essa) },
+	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
+	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
+	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
+	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
+	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
+	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
+	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
+	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
+	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
+	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
+	{ "diagnose_10", VCPU_STAT(diagnose_10) },
+	{ "diagnose_44", VCPU_STAT(diagnose_44) },
+	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
+	{ NULL }
+};
+
+/* upper facilities limit for kvm */
+unsigned long kvm_s390_fac_list_mask[] = {
+	0xffe6fffbfcfdfc40UL,
+	0x005c800000000000UL,
+};
+
+unsigned long kvm_s390_fac_list_mask_size(void)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
+	return ARRAY_SIZE(kvm_s390_fac_list_mask);
+}
+
+static struct gmap_notifier gmap_notifier;
+
+/* Section: not file related */
+int kvm_arch_hardware_enable(void)
+{
+	/* every s390 is virtualization enabled ;-) */
+	return 0;
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+
+int kvm_arch_hardware_setup(void)
+{
+	gmap_notifier.notifier_call = kvm_gmap_notifier;
+	gmap_register_ipte_notifier(&gmap_notifier);
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+	gmap_unregister_ipte_notifier(&gmap_notifier);
+}
+
+int kvm_arch_init(void *opaque)
+{
+	/* Register floating interrupt controller interface. */
+	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
+}
+
+/* Section: device related */
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	if (ioctl == KVM_S390_ENABLE_SIE)
+		return s390_enable_sie();
+	return -EINVAL;
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_S390_PSW:
+	case KVM_CAP_S390_GMAP:
+	case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_S390_UCONTROL
+	case KVM_CAP_S390_UCONTROL:
+#endif
+	case KVM_CAP_ASYNC_PF:
+	case KVM_CAP_SYNC_REGS:
+	case KVM_CAP_ONE_REG:
+	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_S390_CSS_SUPPORT:
+	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_DEVICE_CTRL:
+	case KVM_CAP_ENABLE_CAP_VM:
+	case KVM_CAP_S390_IRQCHIP:
+	case KVM_CAP_VM_ATTRIBUTES:
+	case KVM_CAP_MP_STATE:
+	case KVM_CAP_S390_INJECT_IRQ:
+	case KVM_CAP_S390_USER_SIGP:
+	case KVM_CAP_S390_USER_STSI:
+	case KVM_CAP_S390_SKEYS:
+	case KVM_CAP_S390_IRQ_STATE:
+		r = 1;
+		break;
+	case KVM_CAP_S390_MEM_OP:
+		r = MEM_OP_MAX_SIZE;
+		break;
+	case KVM_CAP_NR_VCPUS:
+	case KVM_CAP_MAX_VCPUS:
+		r = KVM_MAX_VCPUS;
+		break;
+	case KVM_CAP_NR_MEMSLOTS:
+		r = KVM_USER_MEM_SLOTS;
+		break;
+	case KVM_CAP_S390_COW:
+		r = MACHINE_HAS_ESOP;
+		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		r = MACHINE_HAS_VX;
+		break;
+	default:
+		r = 0;
+	}
+	return r;
+}
+
+static void kvm_s390_sync_dirty_log(struct kvm *kvm,
+					struct kvm_memory_slot *memslot)
+{
+	gfn_t cur_gfn, last_gfn;
+	unsigned long address;
+	struct gmap *gmap = kvm->arch.gmap;
+
+	down_read(&gmap->mm->mmap_sem);
+	/* Loop over all guest pages */
+	last_gfn = memslot->base_gfn + memslot->npages;
+	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
+		address = gfn_to_hva_memslot(memslot, cur_gfn);
+
+		if (gmap_test_and_clear_dirty(address, gmap))
+			mark_page_dirty(kvm, cur_gfn);
+	}
+	up_read(&gmap->mm->mmap_sem);
+}
+
+/* Section: vm related */
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+			       struct kvm_dirty_log *log)
+{
+	int r;
+	unsigned long n;
+	struct kvm_memory_slot *memslot;
+	int is_dirty = 0;
+
+	mutex_lock(&kvm->slots_lock);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_USER_MEM_SLOTS)
+		goto out;
+
+	memslot = id_to_memslot(kvm->memslots, log->slot);
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	kvm_s390_sync_dirty_log(kvm, memslot);
+	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	if (r)
+		goto out;
+
+	/* Clear the dirty log */
+	if (is_dirty) {
+		n = kvm_dirty_bitmap_bytes(memslot);
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+	r = 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_S390_IRQCHIP:
+		kvm->arch.use_irqchip = 1;
+		r = 0;
+		break;
+	case KVM_CAP_S390_USER_SIGP:
+		kvm->arch.user_sigp = 1;
+		r = 0;
+		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		if (MACHINE_HAS_VX) {
+			set_kvm_facility(kvm->arch.model.fac->mask, 129);
+			set_kvm_facility(kvm->arch.model.fac->list, 129);
+			r = 0;
+		} else
+			r = -EINVAL;
+		break;
+	case KVM_CAP_S390_USER_STSI:
+		kvm->arch.user_stsi = 1;
+		r = 0;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_MEM_LIMIT_SIZE:
+		ret = 0;
+		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
+			ret = -EFAULT;
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+	unsigned int idx;
+	switch (attr->attr) {
+	case KVM_S390_VM_MEM_ENABLE_CMMA:
+		ret = -EBUSY;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus) == 0) {
+			kvm->arch.use_cmma = 1;
+			ret = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		break;
+	case KVM_S390_VM_MEM_CLR_CMMA:
+		mutex_lock(&kvm->lock);
+		idx = srcu_read_lock(&kvm->srcu);
+		s390_reset_cmma(kvm->arch.gmap->mm);
+		srcu_read_unlock(&kvm->srcu, idx);
+		mutex_unlock(&kvm->lock);
+		ret = 0;
+		break;
+	case KVM_S390_VM_MEM_LIMIT_SIZE: {
+		unsigned long new_limit;
+
+		if (kvm_is_ucontrol(kvm))
+			return -EINVAL;
+
+		if (get_user(new_limit, (u64 __user *)attr->addr))
+			return -EFAULT;
+
+		if (new_limit > kvm->arch.gmap->asce_end)
+			return -E2BIG;
+
+		ret = -EBUSY;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus) == 0) {
+			/* gmap_alloc will round the limit up */
+			struct gmap *new = gmap_alloc(current->mm, new_limit);
+
+			if (!new) {
+				ret = -ENOMEM;
+			} else {
+				gmap_free(kvm->arch.gmap);
+				new->private = kvm;
+				kvm->arch.gmap = new;
+				ret = 0;
+			}
+		}
+		mutex_unlock(&kvm->lock);
+		break;
+	}
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
+
+static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	if (!test_kvm_facility(kvm, 76))
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	switch (attr->attr) {
+	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+		get_random_bytes(
+			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+		kvm->arch.crypto.aes_kw = 1;
+		break;
+	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+		get_random_bytes(
+			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+		kvm->arch.crypto.dea_kw = 1;
+		break;
+	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+		kvm->arch.crypto.aes_kw = 0;
+		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
+			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+		break;
+	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+		kvm->arch.crypto.dea_kw = 0;
+		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
+			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+		break;
+	default:
+		mutex_unlock(&kvm->lock);
+		return -ENXIO;
+	}
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		kvm_s390_vcpu_crypto_setup(vcpu);
+		exit_sie(vcpu);
+	}
+	mutex_unlock(&kvm->lock);
+	return 0;
+}
+
+static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	u8 gtod_high;
+
+	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
+					   sizeof(gtod_high)))
+		return -EFAULT;
+
+	if (gtod_high != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_vcpu *cur_vcpu;
+	unsigned int vcpu_idx;
+	u64 host_tod, gtod;
+	int r;
+
+	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
+		return -EFAULT;
+
+	r = store_tod_clock(&host_tod);
+	if (r)
+		return r;
+
+	mutex_lock(&kvm->lock);
+	kvm->arch.epoch = gtod - host_tod;
+	kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
+		cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
+		exit_sie(cur_vcpu);
+	}
+	mutex_unlock(&kvm->lock);
+	return 0;
+}
+
+static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	if (attr->flags)
+		return -EINVAL;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_TOD_HIGH:
+		ret = kvm_s390_set_tod_high(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD_LOW:
+		ret = kvm_s390_set_tod_low(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	u8 gtod_high = 0;
+
+	if (copy_to_user((void __user *)attr->addr, &gtod_high,
+					 sizeof(gtod_high)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	u64 host_tod, gtod;
+	int r;
+
+	r = store_tod_clock(&host_tod);
+	if (r)
+		return r;
+
+	gtod = host_tod + kvm->arch.epoch;
+	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	if (attr->flags)
+		return -EINVAL;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_TOD_HIGH:
+		ret = kvm_s390_get_tod_high(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD_LOW:
+		ret = kvm_s390_get_tod_low(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_processor *proc;
+	int ret = 0;
+
+	mutex_lock(&kvm->lock);
+	if (atomic_read(&kvm->online_vcpus)) {
+		ret = -EBUSY;
+		goto out;
+	}
+	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+	if (!proc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	if (!copy_from_user(proc, (void __user *)attr->addr,
+			    sizeof(*proc))) {
+		memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
+		       sizeof(struct cpuid));
+		kvm->arch.model.ibc = proc->ibc;
+		memcpy(kvm->arch.model.fac->list, proc->fac_list,
+		       S390_ARCH_FAC_LIST_SIZE_BYTE);
+	} else
+		ret = -EFAULT;
+	kfree(proc);
+out:
+	mutex_unlock(&kvm->lock);
+	return ret;
+}
+
+static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret = -ENXIO;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_CPU_PROCESSOR:
+		ret = kvm_s390_set_processor(kvm, attr);
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_processor *proc;
+	int ret = 0;
+
+	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
+	if (!proc) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
+	proc->ibc = kvm->arch.model.ibc;
+	memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
+	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
+		ret = -EFAULT;
+	kfree(proc);
+out:
+	return ret;
+}
+
+static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	struct kvm_s390_vm_cpu_machine *mach;
+	int ret = 0;
+
+	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
+	if (!mach) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	get_cpu_id((struct cpuid *) &mach->cpuid);
+	mach->ibc = sclp_get_ibc();
+	memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
+	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
+	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
+		ret = -EFAULT;
+	kfree(mach);
+out:
+	return ret;
+}
+
+static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret = -ENXIO;
+
+	switch (attr->attr) {
+	case KVM_S390_VM_CPU_PROCESSOR:
+		ret = kvm_s390_get_processor(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MACHINE:
+		ret = kvm_s390_get_machine(kvm, attr);
+		break;
+	}
+	return ret;
+}
+
+static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		ret = kvm_s390_set_mem_control(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD:
+		ret = kvm_s390_set_tod(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MODEL:
+		ret = kvm_s390_set_cpu_model(kvm, attr);
+		break;
+	case KVM_S390_VM_CRYPTO:
+		ret = kvm_s390_vm_set_crypto(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		ret = kvm_s390_get_mem_control(kvm, attr);
+		break;
+	case KVM_S390_VM_TOD:
+		ret = kvm_s390_get_tod(kvm, attr);
+		break;
+	case KVM_S390_VM_CPU_MODEL:
+		ret = kvm_s390_get_cpu_model(kvm, attr);
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	int ret;
+
+	switch (attr->group) {
+	case KVM_S390_VM_MEM_CTRL:
+		switch (attr->attr) {
+		case KVM_S390_VM_MEM_ENABLE_CMMA:
+		case KVM_S390_VM_MEM_CLR_CMMA:
+		case KVM_S390_VM_MEM_LIMIT_SIZE:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case KVM_S390_VM_TOD:
+		switch (attr->attr) {
+		case KVM_S390_VM_TOD_LOW:
+		case KVM_S390_VM_TOD_HIGH:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case KVM_S390_VM_CPU_MODEL:
+		switch (attr->attr) {
+		case KVM_S390_VM_CPU_PROCESSOR:
+		case KVM_S390_VM_CPU_MACHINE:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	case KVM_S390_VM_CRYPTO:
+		switch (attr->attr) {
+		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
+		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
+		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
+		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
+			ret = 0;
+			break;
+		default:
+			ret = -ENXIO;
+			break;
+		}
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+
+	return ret;
+}
+
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	unsigned long curkey;
+	int i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Is this guest using storage keys? */
+	if (!mm_use_skey(current->mm))
+		return KVM_S390_GET_SKEYS_NONE;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kmalloc_array(args->count, sizeof(uint8_t),
+			     GFP_KERNEL | __GFP_NOWARN);
+	if (!keys)
+		keys = vmalloc(sizeof(uint8_t) * args->count);
+	if (!keys)
+		return -ENOMEM;
+
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		curkey = get_guest_storage_key(current->mm, hva);
+		if (IS_ERR_VALUE(curkey)) {
+			r = curkey;
+			goto out;
+		}
+		keys[i] = curkey;
+	}
+
+	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+			 sizeof(uint8_t) * args->count);
+	if (r)
+		r = -EFAULT;
+out:
+	kvfree(keys);
+	return r;
+}
+
+static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	int i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kmalloc_array(args->count, sizeof(uint8_t),
+			     GFP_KERNEL | __GFP_NOWARN);
+	if (!keys)
+		keys = vmalloc(sizeof(uint8_t) * args->count);
+	if (!keys)
+		return -ENOMEM;
+
+	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+			   sizeof(uint8_t) * args->count);
+	if (r) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	/* Enable storage key handling for the guest */
+	s390_enable_skey();
+
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		/* Lowest order bit is reserved */
+		if (keys[i] & 0x01) {
+			r = -EINVAL;
+			goto out;
+		}
+
+		r = set_guest_storage_key(current->mm, hva,
+					  (unsigned long)keys[i], 0);
+		if (r)
+			goto out;
+	}
+out:
+	kvfree(keys);
+	return r;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		       unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	struct kvm_device_attr attr;
+	int r;
+
+	switch (ioctl) {
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			break;
+		r = kvm_s390_inject_vm(kvm, &s390int);
+		break;
+	}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			break;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
+	case KVM_CREATE_IRQCHIP: {
+		struct kvm_irq_routing_entry routing;
+
+		r = -EINVAL;
+		if (kvm->arch.use_irqchip) {
+			/* Set up dummy routing. */
+			memset(&routing, 0, sizeof(routing));
+			kvm_set_irq_routing(kvm, &routing, 0, 0);
+			r = 0;
+		}
+		break;
+	}
+	case KVM_SET_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_set_attr(kvm, &attr);
+		break;
+	}
+	case KVM_GET_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_get_attr(kvm, &attr);
+		break;
+	}
+	case KVM_HAS_DEVICE_ATTR: {
+		r = -EFAULT;
+		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+			break;
+		r = kvm_s390_vm_has_attr(kvm, &attr);
+		break;
+	}
+	case KVM_S390_GET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_get_skeys(kvm, &args);
+		break;
+	}
+	case KVM_S390_SET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_set_skeys(kvm, &args);
+		break;
+	}
+	default:
+		r = -ENOTTY;
+	}
+
+	return r;
+}
+
+static int kvm_s390_query_ap_config(u8 *config)
+{
+	u32 fcn_code = 0x04000000UL;
+	u32 cc = 0;
+
+	memset(config, 0, 128);
+	asm volatile(
+		"lgr 0,%1\n"
+		"lgr 2,%2\n"
+		".long 0xb2af0000\n"		/* PQAP(QCI) */
+		"0: ipm %0\n"
+		"srl %0,28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: "+r" (cc)
+		: "r" (fcn_code), "r" (config)
+		: "cc", "0", "2", "memory"
+	);
+
+	return cc;
+}
+
+static int kvm_s390_apxa_installed(void)
+{
+	u8 config[128];
+	int cc;
+
+	if (test_facility(2) && test_facility(12)) {
+		cc = kvm_s390_query_ap_config(config);
+
+		if (cc)
+			pr_err("PQAP(QCI) failed with cc=%d", cc);
+		else
+			return config[0] & 0x40;
+	}
+
+	return 0;
+}
+
+static void kvm_s390_set_crycb_format(struct kvm *kvm)
+{
+	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
+
+	if (kvm_s390_apxa_installed())
+		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
+	else
+		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
+}
+
+static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
+{
+	get_cpu_id(cpu_id);
+	cpu_id->version = 0xff;
+}
+
+static int kvm_s390_crypto_init(struct kvm *kvm)
+{
+	if (!test_kvm_facility(kvm, 76))
+		return 0;
+
+	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
+					 GFP_KERNEL | GFP_DMA);
+	if (!kvm->arch.crypto.crycb)
+		return -ENOMEM;
+
+	kvm_s390_set_crycb_format(kvm);
+
+	/* Enable AES/DEA protected key functions by default */
+	kvm->arch.crypto.aes_kw = 1;
+	kvm->arch.crypto.dea_kw = 1;
+	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+
+	return 0;
+}
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+	int i, rc;
+	char debug_name[16];
+	static unsigned long sca_offset;
+
+	rc = -EINVAL;
+#ifdef CONFIG_KVM_S390_UCONTROL
+	if (type & ~KVM_VM_S390_UCONTROL)
+		goto out_err;
+	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
+		goto out_err;
+#else
+	if (type)
+		goto out_err;
+#endif
+
+	rc = s390_enable_sie();
+	if (rc)
+		goto out_err;
+
+	rc = -ENOMEM;
+
+	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+	if (!kvm->arch.sca)
+		goto out_err;
+	spin_lock(&kvm_lock);
+	sca_offset = (sca_offset + 16) & 0x7f0;
+	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+	spin_unlock(&kvm_lock);
+
+	sprintf(debug_name, "kvm-%u", current->pid);
+
+	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+	if (!kvm->arch.dbf)
+		goto out_err;
+
+	/*
+	 * The architectural maximum amount of facilities is 16 kbit. To store
+	 * this amount, 2 kbyte of memory is required. Thus we need a full
+	 * page to hold the guest facility list (arch.model.fac->list) and the
+	 * facility mask (arch.model.fac->mask). Its address size has to be
+	 * 31 bits and word aligned.
+	 */
+	kvm->arch.model.fac =
+		(struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	if (!kvm->arch.model.fac)
+		goto out_err;
+
+	/* Populate the facility mask initially. */
+	memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
+	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
+		if (i < kvm_s390_fac_list_mask_size())
+			kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
+		else
+			kvm->arch.model.fac->mask[i] = 0UL;
+	}
+
+	/* Populate the facility list initially. */
+	memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
+	       S390_ARCH_FAC_LIST_SIZE_BYTE);
+
+	kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
+	kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
+
+	if (kvm_s390_crypto_init(kvm) < 0)
+		goto out_err;
+
+	spin_lock_init(&kvm->arch.float_int.lock);
+	for (i = 0; i < FIRQ_LIST_COUNT; i++)
+		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
+	init_waitqueue_head(&kvm->arch.ipte_wq);
+	mutex_init(&kvm->arch.ipte_mutex);
+
+	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
+	VM_EVENT(kvm, 3, "%s", "vm created");
+
+	if (type & KVM_VM_S390_UCONTROL) {
+		kvm->arch.gmap = NULL;
+	} else {
+		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
+		if (!kvm->arch.gmap)
+			goto out_err;
+		kvm->arch.gmap->private = kvm;
+		kvm->arch.gmap->pfault_enabled = 0;
+	}
+
+	kvm->arch.css_support = 0;
+	kvm->arch.use_irqchip = 0;
+	kvm->arch.epoch = 0;
+
+	spin_lock_init(&kvm->arch.start_stop_lock);
+
+	return 0;
+out_err:
+	kfree(kvm->arch.crypto.crycb);
+	free_page((unsigned long)kvm->arch.model.fac);
+	debug_unregister(kvm->arch.dbf);
+	free_page((unsigned long)(kvm->arch.sca));
+	return rc;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
+	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+	kvm_s390_clear_local_irqs(vcpu);
+	kvm_clear_async_pf_completion_queue(vcpu);
+	if (!kvm_is_ucontrol(vcpu->kvm)) {
+		clear_bit(63 - vcpu->vcpu_id,
+			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
+		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
+		    (__u64) vcpu->arch.sie_block)
+			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
+	}
+	smp_mb();
+
+	if (kvm_is_ucontrol(vcpu->kvm))
+		gmap_free(vcpu->arch.gmap);
+
+	if (kvm_s390_cmma_enabled(vcpu->kvm))
+		kvm_s390_vcpu_unsetup_cmma(vcpu);
+	free_page((unsigned long)(vcpu->arch.sie_block));
+
+	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+static void kvm_free_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_arch_vcpu_destroy(vcpu);
+
+	mutex_lock(&kvm->lock);
+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+		kvm->vcpus[i] = NULL;
+
+	atomic_set(&kvm->online_vcpus, 0);
+	mutex_unlock(&kvm->lock);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	kvm_free_vcpus(kvm);
+	free_page((unsigned long)kvm->arch.model.fac);
+	free_page((unsigned long)(kvm->arch.sca));
+	debug_unregister(kvm->arch.dbf);
+	kfree(kvm->arch.crypto.crycb);
+	if (!kvm_is_ucontrol(kvm))
+		gmap_free(kvm->arch.gmap);
+	kvm_s390_destroy_adapters(kvm);
+	kvm_s390_clear_float_irqs(kvm);
+}
+
+/* Section: vcpu related */
+static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
+	if (!vcpu->arch.gmap)
+		return -ENOMEM;
+	vcpu->arch.gmap->private = vcpu->kvm;
+
+	return 0;
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	kvm_clear_async_pf_completion_queue(vcpu);
+	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
+				    KVM_SYNC_GPRS |
+				    KVM_SYNC_ACRS |
+				    KVM_SYNC_CRS |
+				    KVM_SYNC_ARCH0 |
+				    KVM_SYNC_PFAULT;
+	if (test_kvm_facility(vcpu->kvm, 129))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+
+	if (kvm_is_ucontrol(vcpu->kvm))
+		return __kvm_ucontrol_vcpu_init(vcpu);
+
+	return 0;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+	if (test_kvm_facility(vcpu->kvm, 129))
+		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+	else
+		save_fp_regs(vcpu->arch.host_fpregs.fprs);
+	save_access_regs(vcpu->arch.host_acrs);
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		restore_fp_ctl(&vcpu->run->s.regs.fpc);
+		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	} else {
+		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	}
+	restore_access_regs(vcpu->run->s.regs.acrs);
+	gmap_enable(vcpu->arch.gmap);
+	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	gmap_disable(vcpu->arch.gmap);
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		save_fp_ctl(&vcpu->run->s.regs.fpc);
+		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	} else {
+		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	}
+	save_access_regs(vcpu->run->s.regs.acrs);
+	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+	if (test_kvm_facility(vcpu->kvm, 129))
+		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+	else
+		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+	restore_access_regs(vcpu->arch.host_acrs);
+}
+
+static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
+{
+	/* this equals initial cpu reset in pop, but we don't switch to ESA */
+	vcpu->arch.sie_block->gpsw.mask = 0UL;
+	vcpu->arch.sie_block->gpsw.addr = 0UL;
+	kvm_s390_set_prefix(vcpu, 0);
+	vcpu->arch.sie_block->cputm     = 0UL;
+	vcpu->arch.sie_block->ckc       = 0UL;
+	vcpu->arch.sie_block->todpr     = 0;
+	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
+	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
+	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
+	vcpu->arch.guest_fpregs.fpc = 0;
+	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+	vcpu->arch.sie_block->gbea = 1;
+	vcpu->arch.sie_block->pp = 0;
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	kvm_clear_async_pf_completion_queue(vcpu);
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
+	kvm_s390_clear_local_irqs(vcpu);
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	mutex_lock(&vcpu->kvm->lock);
+	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+	mutex_unlock(&vcpu->kvm->lock);
+	if (!kvm_is_ucontrol(vcpu->kvm))
+		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
+}
+
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
+{
+	if (!test_kvm_facility(vcpu->kvm, 76))
+		return;
+
+	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
+
+	if (vcpu->kvm->arch.crypto.aes_kw)
+		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
+	if (vcpu->kvm->arch.crypto.dea_kw)
+		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
+
+	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
+}
+
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
+{
+	free_page(vcpu->arch.sie_block->cbrlo);
+	vcpu->arch.sie_block->cbrlo = 0;
+}
+
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
+	if (!vcpu->arch.sie_block->cbrlo)
+		return -ENOMEM;
+
+	vcpu->arch.sie_block->ecb2 |= 0x80;
+	vcpu->arch.sie_block->ecb2 &= ~0x08;
+	return 0;
+}
+
+static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
+
+	vcpu->arch.cpu_id = model->cpu_id;
+	vcpu->arch.sie_block->ibc = model->ibc;
+	vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	int rc = 0;
+
+	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
+						    CPUSTAT_SM |
+						    CPUSTAT_STOPPED |
+						    CPUSTAT_GED);
+	kvm_s390_vcpu_setup_model(vcpu);
+
+	vcpu->arch.sie_block->ecb   = 6;
+	if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
+		vcpu->arch.sie_block->ecb |= 0x10;
+
+	vcpu->arch.sie_block->ecb2  = 8;
+	vcpu->arch.sie_block->eca   = 0xC1002000U;
+	if (sclp_has_siif())
+		vcpu->arch.sie_block->eca |= 1;
+	if (sclp_has_sigpif())
+		vcpu->arch.sie_block->eca |= 0x10000000U;
+	if (test_kvm_facility(vcpu->kvm, 129)) {
+		vcpu->arch.sie_block->eca |= 0x00020000;
+		vcpu->arch.sie_block->ecd |= 0x20000000;
+	}
+	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+
+	if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+		rc = kvm_s390_vcpu_setup_cmma(vcpu);
+		if (rc)
+			return rc;
+	}
+	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
+
+	kvm_s390_vcpu_crypto_setup(vcpu);
+
+	return rc;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+				      unsigned int id)
+{
+	struct kvm_vcpu *vcpu;
+	struct sie_page *sie_page;
+	int rc = -EINVAL;
+
+	if (id >= KVM_MAX_VCPUS)
+		goto out;
+
+	rc = -ENOMEM;
+
+	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu)
+		goto out;
+
+	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
+	if (!sie_page)
+		goto out_free_cpu;
+
+	vcpu->arch.sie_block = &sie_page->sie_block;
+	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+	vcpu->arch.host_vregs = &sie_page->vregs;
+
+	vcpu->arch.sie_block->icpua = id;
+	if (!kvm_is_ucontrol(kvm)) {
+		if (!kvm->arch.sca) {
+			WARN_ON_ONCE(1);
+			goto out_free_cpu;
+		}
+		if (!kvm->arch.sca->cpu[id].sda)
+			kvm->arch.sca->cpu[id].sda =
+				(__u64) vcpu->arch.sie_block;
+		vcpu->arch.sie_block->scaoh =
+			(__u32)(((__u64)kvm->arch.sca) >> 32);
+		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
+	}
+
+	spin_lock_init(&vcpu->arch.local_int.lock);
+	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
+	vcpu->arch.local_int.wq = &vcpu->wq;
+	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+
+	rc = kvm_vcpu_init(vcpu, kvm, id);
+	if (rc)
+		goto out_free_sie_block;
+	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+		 vcpu->arch.sie_block);
+	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
+
+	return vcpu;
+out_free_sie_block:
+	free_page((unsigned long)(vcpu->arch.sie_block));
+out_free_cpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+	return ERR_PTR(rc);
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	return kvm_s390_vcpu_has_irq(vcpu, 0);
+}
+
+void s390_vcpu_block(struct kvm_vcpu *vcpu)
+{
+	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
+{
+	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+}
+
+/*
+ * Kick a guest cpu out of SIE and wait until SIE is not running.
+ * If the CPU is not running (e.g. waiting as idle) the function will
+ * return immediately. */
+void exit_sie(struct kvm_vcpu *vcpu)
+{
+	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
+		cpu_relax();
+}
+
+/* Kick a guest cpu out of SIE and prevent SIE-reentry */
+void exit_sie_sync(struct kvm_vcpu *vcpu)
+{
+	s390_vcpu_block(vcpu);
+	exit_sie(vcpu);
+}
+
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
+{
+	int i;
+	struct kvm *kvm = gmap->private;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		/* match against both prefix pages */
+		if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
+			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
+			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+			exit_sie_sync(vcpu);
+		}
+	}
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but never calls it */
+	BUG();
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
+					   struct kvm_one_reg *reg)
+{
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_REG_S390_TODPR:
+		r = put_user(vcpu->arch.sie_block->todpr,
+			     (u32 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_EPOCHDIFF:
+		r = put_user(vcpu->arch.sie_block->epoch,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_CPU_TIMER:
+		r = put_user(vcpu->arch.sie_block->cputm,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_CLOCK_COMP:
+		r = put_user(vcpu->arch.sie_block->ckc,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFTOKEN:
+		r = put_user(vcpu->arch.pfault_token,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFCOMPARE:
+		r = put_user(vcpu->arch.pfault_compare,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFSELECT:
+		r = put_user(vcpu->arch.pfault_select,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PP:
+		r = put_user(vcpu->arch.sie_block->pp,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_GBEA:
+		r = put_user(vcpu->arch.sie_block->gbea,
+			     (u64 __user *)reg->addr);
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
+static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
+					   struct kvm_one_reg *reg)
+{
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_REG_S390_TODPR:
+		r = get_user(vcpu->arch.sie_block->todpr,
+			     (u32 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_EPOCHDIFF:
+		r = get_user(vcpu->arch.sie_block->epoch,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_CPU_TIMER:
+		r = get_user(vcpu->arch.sie_block->cputm,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_CLOCK_COMP:
+		r = get_user(vcpu->arch.sie_block->ckc,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFTOKEN:
+		r = get_user(vcpu->arch.pfault_token,
+			     (u64 __user *)reg->addr);
+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+			kvm_clear_async_pf_completion_queue(vcpu);
+		break;
+	case KVM_REG_S390_PFCOMPARE:
+		r = get_user(vcpu->arch.pfault_compare,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PFSELECT:
+		r = get_user(vcpu->arch.pfault_select,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_PP:
+		r = get_user(vcpu->arch.sie_block->pp,
+			     (u64 __user *)reg->addr);
+		break;
+	case KVM_REG_S390_GBEA:
+		r = get_user(vcpu->arch.sie_block->gbea,
+			     (u64 __user *)reg->addr);
+		break;
+	default:
+		break;
+	}
+
+	return r;
+}
+
+static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+	kvm_s390_vcpu_initial_reset(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
+	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+	restore_access_regs(vcpu->run->s.regs.acrs);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
+	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	if (test_fp_ctl(fpu->fpc))
+		return -EINVAL;
+	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
+{
+	int rc = 0;
+
+	if (!is_vcpu_stopped(vcpu))
+		rc = -EBUSY;
+	else {
+		vcpu->run->psw_mask = psw.mask;
+		vcpu->run->psw_addr = psw.addr;
+	}
+	return rc;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
+			      KVM_GUESTDBG_USE_HW_BP | \
+			      KVM_GUESTDBG_ENABLE)
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					struct kvm_guest_debug *dbg)
+{
+	int rc = 0;
+
+	vcpu->guest_debug = 0;
+	kvm_s390_clear_bp_data(vcpu);
+
+	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
+		return -EINVAL;
+
+	if (dbg->control & KVM_GUESTDBG_ENABLE) {
+		vcpu->guest_debug = dbg->control;
+		/* enforce guest PER */
+		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+
+		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
+			rc = kvm_s390_import_bp_data(vcpu, dbg);
+	} else {
+		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		vcpu->arch.guestdbg.last_bp = 0;
+	}
+
+	if (rc) {
+		vcpu->guest_debug = 0;
+		kvm_s390_clear_bp_data(vcpu);
+		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+	}
+
+	return rc;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	/* CHECK_STOP and LOAD are not supported yet */
+	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+				       KVM_MP_STATE_OPERATING;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	int rc = 0;
+
+	/* user space knows about this interface - let it control the state */
+	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
+
+	switch (mp_state->mp_state) {
+	case KVM_MP_STATE_STOPPED:
+		kvm_s390_vcpu_stop(vcpu);
+		break;
+	case KVM_MP_STATE_OPERATING:
+		kvm_s390_vcpu_start(vcpu);
+		break;
+	case KVM_MP_STATE_LOAD:
+	case KVM_MP_STATE_CHECK_STOP:
+		/* fall through - CHECK_STOP and LOAD are not supported yet */
+	default:
+		rc = -ENXIO;
+	}
+
+	return rc;
+}
+
+bool kvm_s390_cmma_enabled(struct kvm *kvm)
+{
+	if (!MACHINE_IS_LPAR)
+		return false;
+	/* only enable for z10 and later */
+	if (!MACHINE_HAS_EDAT1)
+		return false;
+	if (!kvm->arch.use_cmma)
+		return false;
+	return true;
+}
+
+static bool ibs_enabled(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+}
+
+static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
+{
+retry:
+	s390_vcpu_unblock(vcpu);
+	/*
+	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
+	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
+	 * This ensures that the ipte instruction for this request has
+	 * already finished. We might race against a second unmapper that
+	 * wants to set the blocking bit. Lets just retry the request loop.
+	 */
+	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+		int rc;
+		rc = gmap_ipte_notify(vcpu->arch.gmap,
+				      kvm_s390_get_prefix(vcpu),
+				      PAGE_SIZE * 2);
+		if (rc)
+			return rc;
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+		vcpu->arch.sie_block->ihcpu = 0xffff;
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
+		if (!ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
+			atomic_set_mask(CPUSTAT_IBS,
+					&vcpu->arch.sie_block->cpuflags);
+		}
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
+		if (ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
+			atomic_clear_mask(CPUSTAT_IBS,
+					  &vcpu->arch.sie_block->cpuflags);
+		}
+		goto retry;
+	}
+
+	/* nothing to do, just clear the request */
+	clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+
+	return 0;
+}
+
+/**
+ * kvm_arch_fault_in_page - fault-in guest page if necessary
+ * @vcpu: The corresponding virtual cpu
+ * @gpa: Guest physical address
+ * @writable: Whether the page should be writable or not
+ *
+ * Make sure that a guest page has been faulted-in on the host.
+ *
+ * Return: Zero on success, negative error code otherwise.
+ */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
+{
+	return gmap_fault(vcpu->arch.gmap, gpa,
+			  writable ? FAULT_FLAG_WRITE : 0);
+}
+
+static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+				      unsigned long token)
+{
+	struct kvm_s390_interrupt inti;
+	struct kvm_s390_irq irq;
+
+	if (start_token) {
+		irq.u.ext.ext_params2 = token;
+		irq.type = KVM_S390_INT_PFAULT_INIT;
+		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
+	} else {
+		inti.type = KVM_S390_INT_PFAULT_DONE;
+		inti.parm64 = token;
+		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+	}
+}
+
+void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+				     struct kvm_async_pf *work)
+{
+	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+				 struct kvm_async_pf *work)
+{
+	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+}
+
+void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+			       struct kvm_async_pf *work)
+{
+	/* s390 will always inject the page directly */
+}
+
+bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * s390 will always inject the page directly,
+	 * but we still want check_async_completion to cleanup
+	 */
+	return true;
+}
+
+static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+{
+	hva_t hva;
+	struct kvm_arch_async_pf arch;
+	int rc;
+
+	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+		return 0;
+	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+	    vcpu->arch.pfault_compare)
+		return 0;
+	if (psw_extint_disabled(vcpu))
+		return 0;
+	if (kvm_s390_vcpu_has_irq(vcpu, 0))
+		return 0;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+		return 0;
+	if (!vcpu->arch.gmap->pfault_enabled)
+		return 0;
+
+	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
+	hva += current->thread.gmap_addr & ~PAGE_MASK;
+	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
+		return 0;
+
+	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+	return rc;
+}
+
+static int vcpu_pre_run(struct kvm_vcpu *vcpu)
+{
+	int rc, cpuflags;
+
+	/*
+	 * On s390 notifications for arriving pages will be delivered directly
+	 * to the guest but the house keeping for completed pfaults is
+	 * handled outside the worker.
+	 */
+	kvm_check_async_pf_completion(vcpu);
+
+	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
+
+	if (need_resched())
+		schedule();
+
+	if (test_cpu_flag(CIF_MCCK_PENDING))
+		s390_handle_mcck();
+
+	if (!kvm_is_ucontrol(vcpu->kvm)) {
+		rc = kvm_s390_deliver_pending_interrupts(vcpu);
+		if (rc)
+			return rc;
+	}
+
+	rc = kvm_s390_handle_requests(vcpu);
+	if (rc)
+		return rc;
+
+	if (guestdbg_enabled(vcpu)) {
+		kvm_s390_backup_guest_per_regs(vcpu);
+		kvm_s390_patch_guest_per_regs(vcpu);
+	}
+
+	vcpu->arch.sie_block->icptcode = 0;
+	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
+	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
+	trace_kvm_s390_sie_enter(vcpu, cpuflags);
+
+	return 0;
+}
+
+static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+{
+	psw_t *psw = &vcpu->arch.sie_block->gpsw;
+	u8 opcode;
+	int rc;
+
+	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+	trace_kvm_s390_sie_fault(vcpu);
+
+	/*
+	 * We want to inject an addressing exception, which is defined as a
+	 * suppressing or terminating exception. However, since we came here
+	 * by a DAT access exception, the PSW still points to the faulting
+	 * instruction since DAT exceptions are nullifying. So we've got
+	 * to look up the current opcode to get the length of the instruction
+	 * to be able to forward the PSW.
+	 */
+	rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	psw->addr = __rewind_psw(*psw, -insn_length(opcode));
+
+	return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+}
+
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+{
+	int rc = -1;
+
+	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+		   vcpu->arch.sie_block->icptcode);
+	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
+
+	if (guestdbg_enabled(vcpu))
+		kvm_s390_restore_guest_per_regs(vcpu);
+
+	if (exit_reason >= 0) {
+		rc = 0;
+	} else if (kvm_is_ucontrol(vcpu->kvm)) {
+		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+		vcpu->run->s390_ucontrol.trans_exc_code =
+						current->thread.gmap_addr;
+		vcpu->run->s390_ucontrol.pgm_code = 0x10;
+		rc = -EREMOTE;
+
+	} else if (current->thread.gmap_pfault) {
+		trace_kvm_s390_major_guest_pfault(vcpu);
+		current->thread.gmap_pfault = 0;
+		if (kvm_arch_setup_async_pf(vcpu)) {
+			rc = 0;
+		} else {
+			gpa_t gpa = current->thread.gmap_addr;
+			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
+		}
+	}
+
+	if (rc == -1)
+		rc = vcpu_post_run_fault_in_sie(vcpu);
+
+	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+
+	if (rc == 0) {
+		if (kvm_is_ucontrol(vcpu->kvm))
+			/* Don't exit for host interrupts. */
+			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
+		else
+			rc = kvm_handle_sie_intercept(vcpu);
+	}
+
+	return rc;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+	int rc, exit_reason;
+
+	/*
+	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
+	 * ning the guest), so that memslots (and other stuff) are protected
+	 */
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	do {
+		rc = vcpu_pre_run(vcpu);
+		if (rc)
+			break;
+
+		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+		/*
+		 * As PF_VCPU will be used in fault handler, between
+		 * guest_enter and guest_exit should be no uaccess.
+		 */
+		preempt_disable();
+		kvm_guest_enter();
+		preempt_enable();
+		exit_reason = sie64a(vcpu->arch.sie_block,
+				     vcpu->run->s.regs.gprs);
+		kvm_guest_exit();
+		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+		rc = vcpu_post_run(vcpu, exit_reason);
+	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
+
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	return rc;
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+		/* some control register changes require a tlb flush */
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
+		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
+		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
+		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
+	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
+		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
+		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
+		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
+		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+			kvm_clear_async_pf_completion_queue(vcpu);
+	}
+	kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
+	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
+	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
+	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int rc;
+	sigset_t sigsaved;
+
+	if (guestdbg_exit_pending(vcpu)) {
+		kvm_s390_prepare_debug_exit(vcpu);
+		return 0;
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
+		kvm_s390_vcpu_start(vcpu);
+	} else if (is_vcpu_stopped(vcpu)) {
+		pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+				   vcpu->vcpu_id);
+		return -EINVAL;
+	}
+
+	sync_regs(vcpu, kvm_run);
+
+	might_fault();
+	rc = __vcpu_run(vcpu);
+
+	if (signal_pending(current) && !rc) {
+		kvm_run->exit_reason = KVM_EXIT_INTR;
+		rc = -EINTR;
+	}
+
+	if (guestdbg_exit_pending(vcpu) && !rc)  {
+		kvm_s390_prepare_debug_exit(vcpu);
+		rc = 0;
+	}
+
+	if (rc == -EOPNOTSUPP) {
+		/* intercept cannot be handled in-kernel, prepare kvm-run */
+		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
+		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
+		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
+		rc = 0;
+	}
+
+	if (rc == -EREMOTE) {
+		/* intercept was handled, but userspace support is needed
+		 * kvm_run has been prepared by the handler */
+		rc = 0;
+	}
+
+	store_regs(vcpu, kvm_run);
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	vcpu->stat.exit_userspace++;
+	return rc;
+}
+
+/*
+ * store status at address
+ * we use have two special cases:
+ * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
+ * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
+ */
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
+{
+	unsigned char archmode = 1;
+	unsigned int px;
+	u64 clkcomp;
+	int rc;
+
+	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
+		if (write_guest_abs(vcpu, 163, &archmode, 1))
+			return -EFAULT;
+		gpa = SAVE_AREA_BASE;
+	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
+		if (write_guest_real(vcpu, 163, &archmode, 1))
+			return -EFAULT;
+		gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
+	}
+	rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
+			     vcpu->arch.guest_fpregs.fprs, 128);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
+			      vcpu->run->s.regs.gprs, 128);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
+			      &vcpu->arch.sie_block->gpsw, 16);
+	px = kvm_s390_get_prefix(vcpu);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
+			      &px, 4);
+	rc |= write_guest_abs(vcpu,
+			      gpa + offsetof(struct save_area, fp_ctrl_reg),
+			      &vcpu->arch.guest_fpregs.fpc, 4);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
+			      &vcpu->arch.sie_block->todpr, 4);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
+			      &vcpu->arch.sie_block->cputm, 8);
+	clkcomp = vcpu->arch.sie_block->ckc >> 8;
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
+			      &clkcomp, 8);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
+			      &vcpu->run->s.regs.acrs, 64);
+	rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
+			      &vcpu->arch.sie_block->gcr, 128);
+	return rc ? -EFAULT : 0;
+}
+
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	/*
+	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
+	 * copying in vcpu load/put. Lets update our copies before we save
+	 * it into the save area
+	 */
+	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_access_regs(vcpu->run->s.regs.acrs);
+
+	return kvm_s390_store_status_unloaded(vcpu, addr);
+}
+
+/*
+ * store additional status at address
+ */
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+					unsigned long gpa)
+{
+	/* Only bits 0-53 are used for address formation */
+	if (!(gpa & ~0x3ff))
+		return 0;
+
+	return write_guest_abs(vcpu, gpa & ~0x3ff,
+			       (void *)&vcpu->run->s.regs.vrs, 512);
+}
+
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	if (!test_kvm_facility(vcpu->kvm, 129))
+		return 0;
+
+	/*
+	 * The guest VXRS are in the host VXRs due to the lazy
+	 * copying in vcpu load/put. Let's update our copies before we save
+	 * it into the save area.
+	 */
+	save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+
+	return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
+}
+
+static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
+	kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
+	exit_sie_sync(vcpu);
+}
+
+static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		__disable_ibs_on_vcpu(vcpu);
+	}
+}
+
+static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
+	kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
+	exit_sie_sync(vcpu);
+}
+
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+{
+	int i, online_vcpus, started_vcpus = 0;
+
+	if (!is_vcpu_stopped(vcpu))
+		return;
+
+	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+			started_vcpus++;
+	}
+
+	if (started_vcpus == 0) {
+		/* we're the only active VCPU -> speed it up */
+		__enable_ibs_on_vcpu(vcpu);
+	} else if (started_vcpus == 1) {
+		/*
+		 * As we are starting a second VCPU, we have to disable
+		 * the IBS facility on all VCPUs to remove potentially
+		 * oustanding ENABLE requests.
+		 */
+		__disable_ibs_on_all_vcpus(vcpu->kvm);
+	}
+
+	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	/*
+	 * Another VCPU might have used IBS while we were offline.
+	 * Let's play safe and flush the VCPU at startup.
+	 */
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+	return;
+}
+
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+	int i, online_vcpus, started_vcpus = 0;
+	struct kvm_vcpu *started_vcpu = NULL;
+
+	if (is_vcpu_stopped(vcpu))
+		return;
+
+	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
+	kvm_s390_clear_stop_irq(vcpu);
+
+	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	__disable_ibs_on_vcpu(vcpu);
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+			started_vcpus++;
+			started_vcpu = vcpu->kvm->vcpus[i];
+		}
+	}
+
+	if (started_vcpus == 1) {
+		/*
+		 * As we only have one VCPU left, we want to enable the
+		 * IBS facility for that VCPU to speed it up.
+		 */
+		__enable_ibs_on_vcpu(started_vcpu);
+	}
+
+	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+	return;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+				     struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_S390_CSS_SUPPORT:
+		if (!vcpu->kvm->arch.css_support) {
+			vcpu->kvm->arch.css_support = 1;
+			trace_kvm_s390_enable_css(vcpu->kvm);
+		}
+		r = 0;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
+				  struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	void *tmpbuf = NULL;
+	int r, srcu_idx;
+	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+				    | KVM_S390_MEMOP_F_CHECK_ONLY;
+
+	if (mop->flags & ~supported_flags)
+		return -EINVAL;
+
+	if (mop->size > MEM_OP_MAX_SIZE)
+		return -E2BIG;
+
+	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+		tmpbuf = vmalloc(mop->size);
+		if (!tmpbuf)
+			return -ENOMEM;
+	}
+
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_LOGICAL_READ:
+		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+			break;
+		}
+		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		if (r == 0) {
+			if (copy_to_user(uaddr, tmpbuf, mop->size))
+				r = -EFAULT;
+		}
+		break;
+	case KVM_S390_MEMOP_LOGICAL_WRITE:
+		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+			break;
+		}
+		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+			r = -EFAULT;
+			break;
+		}
+		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+	vfree(tmpbuf);
+	return r;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int idx;
+	long r;
+
+	switch (ioctl) {
+	case KVM_S390_IRQ: {
+		struct kvm_s390_irq s390irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+			break;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
+	}
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+		struct kvm_s390_irq s390irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			break;
+		if (s390int_to_s390irq(&s390int, &s390irq))
+			return -EINVAL;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
+	}
+	case KVM_S390_STORE_STATUS:
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		r = kvm_s390_vcpu_store_status(vcpu, arg);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		break;
+	case KVM_S390_SET_INITIAL_PSW: {
+		psw_t psw;
+
+		r = -EFAULT;
+		if (copy_from_user(&psw, argp, sizeof(psw)))
+			break;
+		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+		break;
+	}
+	case KVM_S390_INITIAL_RESET:
+		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+		break;
+	case KVM_SET_ONE_REG:
+	case KVM_GET_ONE_REG: {
+		struct kvm_one_reg reg;
+		r = -EFAULT;
+		if (copy_from_user(&reg, argp, sizeof(reg)))
+			break;
+		if (ioctl == KVM_SET_ONE_REG)
+			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
+		else
+			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
+		break;
+	}
+#ifdef CONFIG_KVM_S390_UCONTROL
+	case KVM_S390_UCAS_MAP: {
+		struct kvm_s390_ucas_mapping ucasmap;
+
+		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+			r = -EFAULT;
+			break;
+		}
+
+		if (!kvm_is_ucontrol(vcpu->kvm)) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
+				     ucasmap.vcpu_addr, ucasmap.length);
+		break;
+	}
+	case KVM_S390_UCAS_UNMAP: {
+		struct kvm_s390_ucas_mapping ucasmap;
+
+		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
+			r = -EFAULT;
+			break;
+		}
+
+		if (!kvm_is_ucontrol(vcpu->kvm)) {
+			r = -EINVAL;
+			break;
+		}
+
+		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
+			ucasmap.length);
+		break;
+	}
+#endif
+	case KVM_S390_VCPU_FAULT: {
+		r = gmap_fault(vcpu->arch.gmap, arg, 0);
+		break;
+	}
+	case KVM_ENABLE_CAP:
+	{
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			break;
+		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		break;
+	}
+	case KVM_S390_MEM_OP: {
+		struct kvm_s390_mem_op mem_op;
+
+		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+		else
+			r = -EFAULT;
+		break;
+	}
+	case KVM_S390_SET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+		    irq_state.len == 0 ||
+		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_set_irq_state(vcpu,
+					   (void __user *) irq_state.buf,
+					   irq_state.len);
+		break;
+	}
+	case KVM_S390_GET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len == 0) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_get_irq_state(vcpu,
+					   (__u8 __user *)  irq_state.buf,
+					   irq_state.len);
+		break;
+	}
+	default:
+		r = -ENOTTY;
+	}
+	return r;
+}
+
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+#ifdef CONFIG_KVM_S390_UCONTROL
+	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
+		 && (kvm_is_ucontrol(vcpu->kvm))) {
+		vmf->page = virt_to_page(vcpu->arch.sie_block);
+		get_page(vmf->page);
+		return 0;
+	}
+#endif
+	return VM_FAULT_SIGBUS;
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
+{
+	return 0;
+}
+
+/* Section: memory related */
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_userspace_memory_region *mem,
+				   enum kvm_mr_change change)
+{
+	/* A few sanity checks. We can have memory slots which have to be
+	   located/ended at a segment boundary (1MB). The memory in userland is
+	   ok to be fragmented into various different vmas. It is okay to mmap()
+	   and munmap() stuff in this slot after doing this call at any time */
+
+	if (mem->userspace_addr & 0xffffful)
+		return -EINVAL;
+
+	if (mem->memory_size & 0xffffful)
+		return -EINVAL;
+
+	return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				const struct kvm_memory_slot *old,
+				enum kvm_mr_change change)
+{
+	int rc;
+
+	/* If the basics of the memslot do not change, we do not want
+	 * to update the gmap. Every update causes several unnecessary
+	 * segment translation exceptions. This is usually handled just
+	 * fine by the normal fault handler + gmap, but it will also
+	 * cause faults on the prefix page of running guest CPUs.
+	 */
+	if (old->userspace_addr == mem->userspace_addr &&
+	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
+	    old->npages * PAGE_SIZE == mem->memory_size)
+		return;
+
+	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
+		mem->guest_phys_addr, mem->memory_size);
+	if (rc)
+		printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
+	return;
+}
+
+static int __init kvm_s390_init(void)
+{
+	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+}
+
+static void __exit kvm_s390_exit(void)
+{
+	kvm_exit();
+}
+
+module_init(kvm_s390_init);
+module_exit(kvm_s390_exit);
+
+/*
+ * Enable autoloading of the kvm module.
+ * Note that we add the module alias here instead of virt/kvm/kvm_main.c
+ * since x86 takes a different approach.
+ */
+#include <linux/miscdevice.h>
+MODULE_ALIAS_MISCDEV(KVM_MINOR);
+MODULE_ALIAS("devname:kvm");
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
new file mode 100644
index 000000000..ca108b90a
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,290 @@
+/*
+ * definition for kvm on s390
+ *
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ */
+
+#ifndef ARCH_S390_KVM_S390_H
+#define ARCH_S390_KVM_S390_H
+
+#include <linux/hrtimer.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/facility.h>
+
+typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
+
+/* Transactional Memory Execution related macros */
+#define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
+#define TDB_FORMAT1		1
+#define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+
+#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
+#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
+	  "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
+	  d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
+	  d_args); \
+} while (0)
+
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+}
+
+static inline int kvm_is_ucontrol(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_S390_UCONTROL
+	if (kvm->arch.gmap)
+		return 0;
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+#define GUEST_PREFIX_SHIFT 13
+static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.sie_block->prefix << GUEST_PREFIX_SHIFT;
+}
+
+static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
+{
+	vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+}
+
+typedef u8 __bitwise ar_t;
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar)
+{
+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+	if (ar)
+		*ar = base2;
+
+	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
+					      u64 *address1, u64 *address2,
+					      ar_t *ar_b1, ar_t *ar_b2)
+{
+	u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
+	u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
+	u32 base2 = (vcpu->arch.sie_block->ipb & 0xf000) >> 12;
+	u32 disp2 = vcpu->arch.sie_block->ipb & 0x0fff;
+
+	*address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+	*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+
+	if (ar_b1)
+		*ar_b1 = base1;
+	if (ar_b2)
+		*ar_b2 = base2;
+}
+
+static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
+{
+	if (r1)
+		*r1 = (vcpu->arch.sie_block->ipb & 0x00f00000) >> 20;
+	if (r2)
+		*r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
+}
+
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar)
+{
+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+			((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+	/* The displacement is a 20bit _SIGNED_ value */
+	if (disp2 & 0x80000)
+		disp2+=0xfff00000;
+
+	if (ar)
+		*ar = base2;
+
+	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
+}
+
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar)
+{
+	u32 base2 = vcpu->arch.sie_block->ipb >> 28;
+	u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+
+	if (ar)
+		*ar = base2;
+
+	return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+}
+
+/* Set the condition code in the guest program status word */
+static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
+{
+	vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
+	vcpu->arch.sie_block->gpsw.mask |= cc << 44;
+}
+
+/* test availability of facility in a kvm instance */
+static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
+{
+	return __test_facility(nr, kvm->arch.model.fac->mask) &&
+		__test_facility(nr, kvm->arch.model.fac->list);
+}
+
+static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
+{
+	unsigned char *ptr;
+
+	if (nr >= MAX_FACILITY_BIT)
+		return -EINVAL;
+	ptr = (unsigned char *) fac_list + (nr >> 3);
+	*ptr |= (0x80UL >> (nr & 7));
+	return 0;
+}
+
+/* are cpu states controlled by user space */
+static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
+{
+	return kvm->arch.user_cpu_state_ctrl != 0;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
+enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_float_irqs(struct kvm *kvm);
+int __must_check kvm_s390_inject_vm(struct kvm *kvm,
+				    struct kvm_s390_interrupt *s390int);
+int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+				      struct kvm_s390_irq *irq);
+int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
+						    u64 isc_mask, u32 schid);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+			     struct kvm_s390_interrupt_info *inti);
+int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
+
+/* implemented in intercept.c */
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
+/* implemented in priv.c */
+int is_valid_psw(psw_t *psw);
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
+
+/* implemented in sigp.c */
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
+
+/* implemented in kvm-s390.c */
+long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+					unsigned long addr);
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+void s390_vcpu_block(struct kvm_vcpu *vcpu);
+void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
+void exit_sie(struct kvm_vcpu *vcpu);
+void exit_sie_sync(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
+/* is cmma enabled */
+bool kvm_s390_cmma_enabled(struct kvm *kvm);
+unsigned long kvm_s390_fac_list_mask_size(void);
+extern unsigned long kvm_s390_fac_list_mask[];
+
+/* implemented in diag.c */
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+/* implemented in interrupt.c */
+int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
+			     struct kvm_s390_pgm_info *pgm_info);
+
+/**
+ * kvm_s390_inject_prog_cond - conditionally inject a program check
+ * @vcpu: virtual cpu
+ * @rc: original return/error code
+ *
+ * This function is supposed to be used after regular guest access functions
+ * failed, to conditionally inject a program check to a vcpu. The typical
+ * pattern would look like
+ *
+ * rc = write_guest(vcpu, addr, data, len);
+ * if (rc)
+ *	return kvm_s390_inject_prog_cond(vcpu, rc);
+ *
+ * A negative return code from guest access functions implies an internal error
+ * like e.g. out of memory. In these cases no program check should be injected
+ * to the guest.
+ * A positive value implies that an exception happened while accessing a guest's
+ * memory. In this case all data belonging to the corresponding program check
+ * has been stored in vcpu->arch.pgm and can be injected with
+ * kvm_s390_inject_prog_irq().
+ *
+ * Returns: - the original @rc value if @rc was negative (internal error)
+ *	    - zero if @rc was already zero
+ *	    - zero or error code from injecting if @rc was positive
+ *	      (program check injected to @vcpu)
+ */
+static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
+{
+	if (rc <= 0)
+		return rc;
+	return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+}
+
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+			struct kvm_s390_irq *s390irq);
+
+/* implemented in interrupt.c */
+int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop);
+int psw_extint_disabled(struct kvm_vcpu *vcpu);
+void kvm_s390_destroy_adapters(struct kvm *kvm);
+int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
+int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
+void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+			   void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+			   __u8 __user *buf, int len);
+
+/* implemented in guestdbg.c */
+void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
+void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
+int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
+			    struct kvm_guest_debug *dbg);
+void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
+void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
new file mode 100644
index 000000000..d22d8ee1f
--- /dev/null
+++ b/arch/s390/kvm/priv.c
@@ -0,0 +1,1060 @@
+/*
+ * handling privileged instructions
+ *
+ * Copyright IBM Corp. 2008, 2013
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/gfp.h>
+#include <linux/errno.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/facility.h>
+#include <asm/current.h>
+#include <asm/debug.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/compat.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+#include "trace.h"
+
+/* Handle SCK (SET CLOCK) interception */
+static int handle_set_clock(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *cpup;
+	s64 hostclk, val;
+	int i, rc;
+	ar_t ar;
+	u64 op2;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+	if (op2 & 7)	/* Operand must be on a doubleword boundary */
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	if (store_tod_clock(&hostclk)) {
+		kvm_s390_set_psw_cc(vcpu, 3);
+		return 0;
+	}
+	val = (val - hostclk) & ~0x3fUL;
+
+	mutex_lock(&vcpu->kvm->lock);
+	kvm_for_each_vcpu(i, cpup, vcpu->kvm)
+		cpup->arch.sie_block->epoch = val;
+	mutex_unlock(&vcpu->kvm->lock);
+
+	kvm_s390_set_psw_cc(vcpu, 0);
+	return 0;
+}
+
+static int handle_set_prefix(struct kvm_vcpu *vcpu)
+{
+	u64 operand2;
+	u32 address;
+	int rc;
+	ar_t ar;
+
+	vcpu->stat.instruction_spx++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	/* must be word boundary */
+	if (operand2 & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* get the value */
+	rc = read_guest(vcpu, operand2, ar, &address, sizeof(address));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	address &= 0x7fffe000u;
+
+	/*
+	 * Make sure the new value is valid memory. We only need to check the
+	 * first page, since address is 8k aligned and memory pieces are always
+	 * at least 1MB aligned and have at least a size of 1MB.
+	 */
+	if (kvm_is_error_gpa(vcpu->kvm, address))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+	kvm_s390_set_prefix(vcpu, address);
+
+	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
+	trace_kvm_s390_handle_prefix(vcpu, 1, address);
+	return 0;
+}
+
+static int handle_store_prefix(struct kvm_vcpu *vcpu)
+{
+	u64 operand2;
+	u32 address;
+	int rc;
+	ar_t ar;
+
+	vcpu->stat.instruction_stpx++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	/* must be word boundary */
+	if (operand2 & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	address = kvm_s390_get_prefix(vcpu);
+
+	/* get the value */
+	rc = write_guest(vcpu, operand2, ar, &address, sizeof(address));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+	trace_kvm_s390_handle_prefix(vcpu, 0, address);
+	return 0;
+}
+
+static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
+{
+	u16 vcpu_id = vcpu->vcpu_id;
+	u64 ga;
+	int rc;
+	ar_t ar;
+
+	vcpu->stat.instruction_stap++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	if (ga & 1)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
+	trace_kvm_s390_handle_stap(vcpu, ga);
+	return 0;
+}
+
+static int __skey_check_enable(struct kvm_vcpu *vcpu)
+{
+	int rc = 0;
+	if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
+		return rc;
+
+	rc = s390_enable_skey();
+	trace_kvm_s390_skey_related_inst(vcpu);
+	vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+	return rc;
+}
+
+
+static int handle_skey(struct kvm_vcpu *vcpu)
+{
+	int rc = __skey_check_enable(vcpu);
+
+	if (rc)
+		return rc;
+	vcpu->stat.instruction_storage_key++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	kvm_s390_rewind_psw(vcpu, 4);
+	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+	return 0;
+}
+
+static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_ipte_interlock++;
+	if (psw_bits(vcpu->arch.sie_block->gpsw).p)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
+	kvm_s390_rewind_psw(vcpu, 4);
+	VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
+	return 0;
+}
+
+static int handle_test_block(struct kvm_vcpu *vcpu)
+{
+	gpa_t addr;
+	int reg2;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
+	addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	addr = kvm_s390_logical_to_effective(vcpu, addr);
+	if (kvm_s390_check_low_addr_prot_real(vcpu, addr))
+		return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	addr = kvm_s390_real_to_abs(vcpu, addr);
+
+	if (kvm_is_error_gpa(vcpu->kvm, addr))
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	/*
+	 * We don't expect errors on modern systems, and do not care
+	 * about storage keys (yet), so let's just clear the page.
+	 */
+	if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
+		return -EFAULT;
+	kvm_s390_set_psw_cc(vcpu, 0);
+	vcpu->run->s.regs.gprs[0] = 0;
+	return 0;
+}
+
+static int handle_tpi(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_interrupt_info *inti;
+	unsigned long len;
+	u32 tpi_data[3];
+	int rc;
+	u64 addr;
+	ar_t ar;
+
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+	if (addr & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
+	if (!inti) {
+		kvm_s390_set_psw_cc(vcpu, 0);
+		return 0;
+	}
+
+	tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
+	tpi_data[1] = inti->io.io_int_parm;
+	tpi_data[2] = inti->io.io_int_word;
+	if (addr) {
+		/*
+		 * Store the two-word I/O interruption code into the
+		 * provided area.
+		 */
+		len = sizeof(tpi_data) - 4;
+		rc = write_guest(vcpu, addr, ar, &tpi_data, len);
+		if (rc) {
+			rc = kvm_s390_inject_prog_cond(vcpu, rc);
+			goto reinject_interrupt;
+		}
+	} else {
+		/*
+		 * Store the three-word I/O interruption code into
+		 * the appropriate lowcore area.
+		 */
+		len = sizeof(tpi_data);
+		if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+			/* failed writes to the low core are not recoverable */
+			rc = -EFAULT;
+			goto reinject_interrupt;
+		}
+	}
+
+	/* irq was successfully handed to the guest */
+	kfree(inti);
+	kvm_s390_set_psw_cc(vcpu, 1);
+	return 0;
+reinject_interrupt:
+	/*
+	 * If we encounter a problem storing the interruption code, the
+	 * instruction is suppressed from the guest's view: reinject the
+	 * interrupt.
+	 */
+	if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
+		kfree(inti);
+		rc = -EFAULT;
+	}
+	/* don't set the cc, a pgm irq was injected or we drop to user space */
+	return rc ? -EFAULT : 0;
+}
+
+static int handle_tsch(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_interrupt_info *inti = NULL;
+	const u64 isc_mask = 0xffUL << 24; /* all iscs set */
+
+	/* a valid schid has at least one bit set */
+	if (vcpu->run->s.regs.gprs[1])
+		inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
+					   vcpu->run->s.regs.gprs[1]);
+
+	/*
+	 * Prepare exit to userspace.
+	 * We indicate whether we dequeued a pending I/O interrupt
+	 * so that userspace can re-inject it if the instruction gets
+	 * a program check. While this may re-order the pending I/O
+	 * interrupts, this is no problem since the priority is kept
+	 * intact.
+	 */
+	vcpu->run->exit_reason = KVM_EXIT_S390_TSCH;
+	vcpu->run->s390_tsch.dequeued = !!inti;
+	if (inti) {
+		vcpu->run->s390_tsch.subchannel_id = inti->io.subchannel_id;
+		vcpu->run->s390_tsch.subchannel_nr = inti->io.subchannel_nr;
+		vcpu->run->s390_tsch.io_int_parm = inti->io.io_int_parm;
+		vcpu->run->s390_tsch.io_int_word = inti->io.io_int_word;
+	}
+	vcpu->run->s390_tsch.ipb = vcpu->arch.sie_block->ipb;
+	kfree(inti);
+	return -EREMOTE;
+}
+
+static int handle_io_inst(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 4, "%s", "I/O instruction");
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (vcpu->kvm->arch.css_support) {
+		/*
+		 * Most I/O instructions will be handled by userspace.
+		 * Exceptions are tpi and the interrupt portion of tsch.
+		 */
+		if (vcpu->arch.sie_block->ipa == 0xb236)
+			return handle_tpi(vcpu);
+		if (vcpu->arch.sie_block->ipa == 0xb235)
+			return handle_tsch(vcpu);
+		/* Handle in userspace. */
+		return -EOPNOTSUPP;
+	} else {
+		/*
+		 * Set condition code 3 to stop the guest from issuing channel
+		 * I/O instructions.
+		 */
+		kvm_s390_set_psw_cc(vcpu, 3);
+		return 0;
+	}
+}
+
+static int handle_stfl(struct kvm_vcpu *vcpu)
+{
+	int rc;
+	unsigned int fac;
+
+	vcpu->stat.instruction_stfl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	/*
+	 * We need to shift the lower 32 facility bits (bit 0-31) from a u64
+	 * into a u32 memory representation. They will remain bits 0-31.
+	 */
+	fac = *vcpu->kvm->arch.model.fac->list >> 32;
+	rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+			    &fac, sizeof(fac));
+	if (rc)
+		return rc;
+	VCPU_EVENT(vcpu, 5, "store facility list value %x", fac);
+	trace_kvm_s390_handle_stfl(vcpu, fac);
+	return 0;
+}
+
+#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
+#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
+#define PSW_ADDR_24 0x0000000000ffffffUL
+#define PSW_ADDR_31 0x000000007fffffffUL
+
+int is_valid_psw(psw_t *psw)
+{
+	if (psw->mask & PSW_MASK_UNASSIGNED)
+		return 0;
+	if ((psw->mask & PSW_MASK_ADDR_MODE) == PSW_MASK_BA) {
+		if (psw->addr & ~PSW_ADDR_31)
+			return 0;
+	}
+	if (!(psw->mask & PSW_MASK_ADDR_MODE) && (psw->addr & ~PSW_ADDR_24))
+		return 0;
+	if ((psw->mask & PSW_MASK_ADDR_MODE) ==  PSW_MASK_EA)
+		return 0;
+	if (psw->addr & 1)
+		return 0;
+	return 1;
+}
+
+int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
+{
+	psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
+	psw_compat_t new_psw;
+	u64 addr;
+	int rc;
+	ar_t ar;
+
+	if (gpsw->mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+	if (addr & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	if (!(new_psw.mask & PSW32_MASK_BASE))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
+	gpsw->mask |= new_psw.addr & PSW32_ADDR_AMODE;
+	gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
+	if (!is_valid_psw(gpsw))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	return 0;
+}
+
+static int handle_lpswe(struct kvm_vcpu *vcpu)
+{
+	psw_t new_psw;
+	u64 addr;
+	int rc;
+	ar_t ar;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	addr = kvm_s390_get_base_disp_s(vcpu, &ar);
+	if (addr & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	vcpu->arch.sie_block->gpsw = new_psw;
+	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	return 0;
+}
+
+static int handle_stidp(struct kvm_vcpu *vcpu)
+{
+	u64 stidp_data = vcpu->arch.stidp_data;
+	u64 operand2;
+	int rc;
+	ar_t ar;
+
+	vcpu->stat.instruction_stidp++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	if (operand2 & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+
+	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+	return 0;
+}
+
+static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
+{
+	int cpus = 0;
+	int n;
+
+	cpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	/* deal with other level 3 hypervisors */
+	if (stsi(mem, 3, 2, 2))
+		mem->count = 0;
+	if (mem->count < 8)
+		mem->count++;
+	for (n = mem->count - 1; n > 0 ; n--)
+		memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
+	memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
+	mem->vm[0].cpus_total = cpus;
+	mem->vm[0].cpus_configured = cpus;
+	mem->vm[0].cpus_standby = 0;
+	mem->vm[0].cpus_reserved = 0;
+	mem->vm[0].caf = 1000;
+	memcpy(mem->vm[0].name, "KVMguest", 8);
+	ASCEBC(mem->vm[0].name, 8);
+	memcpy(mem->vm[0].cpi, "KVM/Linux       ", 16);
+	ASCEBC(mem->vm[0].cpi, 16);
+}
+
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar,
+				 u8 fc, u8 sel1, u16 sel2)
+{
+	vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+	vcpu->run->s390_stsi.addr = addr;
+	vcpu->run->s390_stsi.ar = ar;
+	vcpu->run->s390_stsi.fc = fc;
+	vcpu->run->s390_stsi.sel1 = sel1;
+	vcpu->run->s390_stsi.sel2 = sel2;
+}
+
+static int handle_stsi(struct kvm_vcpu *vcpu)
+{
+	int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
+	int sel1 = vcpu->run->s.regs.gprs[0] & 0xff;
+	int sel2 = vcpu->run->s.regs.gprs[1] & 0xffff;
+	unsigned long mem = 0;
+	u64 operand2;
+	int rc = 0;
+	ar_t ar;
+
+	vcpu->stat.instruction_stsi++;
+	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (fc > 3) {
+		kvm_s390_set_psw_cc(vcpu, 3);
+		return 0;
+	}
+
+	if (vcpu->run->s.regs.gprs[0] & 0x0fffff00
+	    || vcpu->run->s.regs.gprs[1] & 0xffff0000)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	if (fc == 0) {
+		vcpu->run->s.regs.gprs[0] = 3 << 28;
+		kvm_s390_set_psw_cc(vcpu, 0);
+		return 0;
+	}
+
+	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
+
+	if (operand2 & 0xfff)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (fc) {
+	case 1: /* same handling for 1 and 2 */
+	case 2:
+		mem = get_zeroed_page(GFP_KERNEL);
+		if (!mem)
+			goto out_no_data;
+		if (stsi((void *) mem, fc, sel1, sel2))
+			goto out_no_data;
+		break;
+	case 3:
+		if (sel1 != 2 || sel2 != 2)
+			goto out_no_data;
+		mem = get_zeroed_page(GFP_KERNEL);
+		if (!mem)
+			goto out_no_data;
+		handle_stsi_3_2_2(vcpu, (void *) mem);
+		break;
+	}
+
+	rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+	if (rc) {
+		rc = kvm_s390_inject_prog_cond(vcpu, rc);
+		goto out;
+	}
+	if (vcpu->kvm->arch.user_stsi) {
+		insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+		rc = -EREMOTE;
+	}
+	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
+	free_page(mem);
+	kvm_s390_set_psw_cc(vcpu, 0);
+	vcpu->run->s.regs.gprs[0] = 0;
+	return rc;
+out_no_data:
+	kvm_s390_set_psw_cc(vcpu, 3);
+out:
+	free_page(mem);
+	return rc;
+}
+
+static const intercept_handler_t b2_handlers[256] = {
+	[0x02] = handle_stidp,
+	[0x04] = handle_set_clock,
+	[0x10] = handle_set_prefix,
+	[0x11] = handle_store_prefix,
+	[0x12] = handle_store_cpu_address,
+	[0x21] = handle_ipte_interlock,
+	[0x29] = handle_skey,
+	[0x2a] = handle_skey,
+	[0x2b] = handle_skey,
+	[0x2c] = handle_test_block,
+	[0x30] = handle_io_inst,
+	[0x31] = handle_io_inst,
+	[0x32] = handle_io_inst,
+	[0x33] = handle_io_inst,
+	[0x34] = handle_io_inst,
+	[0x35] = handle_io_inst,
+	[0x36] = handle_io_inst,
+	[0x37] = handle_io_inst,
+	[0x38] = handle_io_inst,
+	[0x39] = handle_io_inst,
+	[0x3a] = handle_io_inst,
+	[0x3b] = handle_io_inst,
+	[0x3c] = handle_io_inst,
+	[0x50] = handle_ipte_interlock,
+	[0x5f] = handle_io_inst,
+	[0x74] = handle_io_inst,
+	[0x76] = handle_io_inst,
+	[0x7d] = handle_stsi,
+	[0xb1] = handle_stfl,
+	[0xb2] = handle_lpswe,
+};
+
+int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	/*
+	 * A lot of B2 instructions are priviledged. Here we check for
+	 * the privileged ones, that we can handle in the kernel.
+	 * Anything else goes to userspace.
+	 */
+	handler = b2_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+	if (handler)
+		return handler(vcpu);
+
+	return -EOPNOTSUPP;
+}
+
+static int handle_epsw(struct kvm_vcpu *vcpu)
+{
+	int reg1, reg2;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	/* This basically extracts the mask half of the psw. */
+	vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
+	vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
+	if (reg2) {
+		vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
+		vcpu->run->s.regs.gprs[reg2] |=
+			vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
+	}
+	return 0;
+}
+
+#define PFMF_RESERVED   0xfffc0101UL
+#define PFMF_SK         0x00020000UL
+#define PFMF_CF         0x00010000UL
+#define PFMF_UI         0x00008000UL
+#define PFMF_FSC        0x00007000UL
+#define PFMF_NQ         0x00000800UL
+#define PFMF_MR         0x00000400UL
+#define PFMF_MC         0x00000200UL
+#define PFMF_KEY        0x000000feUL
+
+static int handle_pfmf(struct kvm_vcpu *vcpu)
+{
+	int reg1, reg2;
+	unsigned long start, end;
+
+	vcpu->stat.instruction_pfmf++;
+
+	kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+	if (!MACHINE_HAS_PFMF)
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* Only provide non-quiescing support if the host supports it */
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* No support for conditional-SSKE */
+	if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+	start = kvm_s390_logical_to_effective(vcpu, start);
+
+	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+	case 0x00000000:
+		end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
+		break;
+	case 0x00001000:
+		end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+		break;
+	/* We dont support EDAT2
+	case 0x00002000:
+		end = (start + (1UL << 31)) & ~((1UL << 31) - 1);
+		break;*/
+	default:
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	}
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+		if (kvm_s390_check_low_addr_prot_real(vcpu, start))
+			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	}
+
+	while (start < end) {
+		unsigned long useraddr, abs_addr;
+
+		/* Translate guest address to host address */
+		if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0)
+			abs_addr = kvm_s390_real_to_abs(vcpu, start);
+		else
+			abs_addr = start;
+		useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr));
+		if (kvm_is_error_hva(useraddr))
+			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+		if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+			if (clear_user((void __user *)useraddr, PAGE_SIZE))
+				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		}
+
+		if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
+			int rc = __skey_check_enable(vcpu);
+
+			if (rc)
+				return rc;
+			if (set_guest_storage_key(current->mm, useraddr,
+					vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
+					vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		}
+
+		start += PAGE_SIZE;
+	}
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
+		vcpu->run->s.regs.gprs[reg2] = end;
+	return 0;
+}
+
+static int handle_essa(struct kvm_vcpu *vcpu)
+{
+	/* entries expected to be 1FF */
+	int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+	unsigned long *cbrlo, cbrle;
+	struct gmap *gmap;
+	int i;
+
+	VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+	gmap = vcpu->arch.gmap;
+	vcpu->stat.instruction_essa++;
+	if (!kvm_s390_cmma_enabled(vcpu->kvm))
+		return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	/* Rewind PSW to repeat the ESSA instruction */
+	kvm_s390_rewind_psw(vcpu, 4);
+	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
+	cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
+	down_read(&gmap->mm->mmap_sem);
+	for (i = 0; i < entries; ++i) {
+		cbrle = cbrlo[i];
+		if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
+			/* invalid entry */
+			break;
+		/* try to free backing */
+		__gmap_zap(gmap, cbrle);
+	}
+	up_read(&gmap->mm->mmap_sem);
+	if (i < entries)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+	return 0;
+}
+
+static const intercept_handler_t b9_handlers[256] = {
+	[0x8a] = handle_ipte_interlock,
+	[0x8d] = handle_epsw,
+	[0x8e] = handle_ipte_interlock,
+	[0x8f] = handle_ipte_interlock,
+	[0xab] = handle_essa,
+	[0xaf] = handle_pfmf,
+};
+
+int kvm_s390_handle_b9(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	/* This is handled just as for the B2 instructions. */
+	handler = b9_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+	if (handler)
+		return handler(vcpu);
+
+	return -EOPNOTSUPP;
+}
+
+int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
+	u64 ga;
+	ar_t ar;
+
+	vcpu->stat.instruction_lctl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
+
+	if (ga & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
+
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	reg = reg1;
+	nr_regs = 0;
+	do {
+		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	return 0;
+}
+
+int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
+	u64 ga;
+	ar_t ar;
+
+	vcpu->stat.instruction_stctl++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
+
+	if (ga & 3)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
+
+	reg = reg1;
+	nr_regs = 0;
+	do {
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
+}
+
+static int handle_lctlg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
+	ar_t ar;
+
+	vcpu->stat.instruction_lctlg++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
+
+	if (ga & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
+
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	reg = reg1;
+	nr_regs = 0;
+	do {
+		vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	return 0;
+}
+
+static int handle_stctg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
+	ar_t ar;
+
+	vcpu->stat.instruction_stctg++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
+
+	if (ga & 7)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
+	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
+
+	reg = reg1;
+	nr_regs = 0;
+	do {
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
+}
+
+static const intercept_handler_t eb_handlers[256] = {
+	[0x2f] = handle_lctlg,
+	[0x25] = handle_stctg,
+};
+
+int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	handler = eb_handlers[vcpu->arch.sie_block->ipb & 0xff];
+	if (handler)
+		return handler(vcpu);
+	return -EOPNOTSUPP;
+}
+
+static int handle_tprot(struct kvm_vcpu *vcpu)
+{
+	u64 address1, address2;
+	unsigned long hva, gpa;
+	int ret = 0, cc = 0;
+	bool writable;
+	ar_t ar;
+
+	vcpu->stat.instruction_tprot++;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
+
+	/* we only handle the Linux memory detection case:
+	 * access key == 0
+	 * everything else goes to userspace. */
+	if (address2 & 0xf0)
+		return -EOPNOTSUPP;
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+		ipte_lock(vcpu);
+	ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
+	if (ret == PGM_PROTECTION) {
+		/* Write protected? Try again with read-only... */
+		cc = 1;
+		ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
+	}
+	if (ret) {
+		if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
+			ret = kvm_s390_inject_program_int(vcpu, ret);
+		} else if (ret > 0) {
+			/* Translation not available */
+			kvm_s390_set_psw_cc(vcpu, 3);
+			ret = 0;
+		}
+		goto out_unlock;
+	}
+
+	hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable);
+	if (kvm_is_error_hva(hva)) {
+		ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	} else {
+		if (!writable)
+			cc = 1;		/* Write not permitted ==> read-only */
+		kvm_s390_set_psw_cc(vcpu, cc);
+		/* Note: CC2 only occurs for storage keys (not supported yet) */
+	}
+out_unlock:
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
+		ipte_unlock(vcpu);
+	return ret;
+}
+
+int kvm_s390_handle_e5(struct kvm_vcpu *vcpu)
+{
+	/* For e5xx... instructions we only handle TPROT */
+	if ((vcpu->arch.sie_block->ipa & 0x00ff) == 0x01)
+		return handle_tprot(vcpu);
+	return -EOPNOTSUPP;
+}
+
+static int handle_sckpf(struct kvm_vcpu *vcpu)
+{
+	u32 value;
+
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000)
+		return kvm_s390_inject_program_int(vcpu,
+						   PGM_SPECIFICATION);
+
+	value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff;
+	vcpu->arch.sie_block->todpr = value;
+
+	return 0;
+}
+
+static const intercept_handler_t x01_handlers[256] = {
+	[0x07] = handle_sckpf,
+};
+
+int kvm_s390_handle_01(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+	if (handler)
+		return handler(vcpu);
+	return -EOPNOTSUPP;
+}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
new file mode 100644
index 000000000..72e58bd2b
--- /dev/null
+++ b/arch/s390/kvm/sigp.c
@@ -0,0 +1,493 @@
+/*
+ * handling interprocessor communication
+ *
+ * Copyright IBM Corp. 2008, 2013
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Christian Ehrhardt <ehrhardt@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <asm/sigp.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+#include "trace.h"
+
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+			u64 *reg)
+{
+	struct kvm_s390_local_interrupt *li;
+	int cpuflags;
+	int rc;
+	int ext_call_pending;
+
+	li = &dst_vcpu->arch.local_int;
+
+	cpuflags = atomic_read(li->cpuflags);
+	ext_call_pending = kvm_s390_ext_call_pending(dst_vcpu);
+	if (!(cpuflags & CPUSTAT_STOPPED) && !ext_call_pending)
+		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+	else {
+		*reg &= 0xffffffff00000000UL;
+		if (ext_call_pending)
+			*reg |= SIGP_STATUS_EXT_CALL_PENDING;
+		if (cpuflags & CPUSTAT_STOPPED)
+			*reg |= SIGP_STATUS_STOPPED;
+		rc = SIGP_CC_STATUS_STORED;
+	}
+
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+		   rc);
+	return rc;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_INT_EMERGENCY,
+		.u.emerg.code = vcpu->vcpu_id,
+	};
+	int rc = 0;
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (!rc)
+		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+			   dst_vcpu->vcpu_id);
+
+	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
+
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+	return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu,
+					u16 asn, u64 *reg)
+{
+	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
+	u16 p_asn, s_asn;
+	psw_t *psw;
+	u32 flags;
+
+	flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
+	psw = &dst_vcpu->arch.sie_block->gpsw;
+	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
+	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
+
+	/* Inject the emergency signal? */
+	if (!(flags & CPUSTAT_STOPPED)
+	    || (psw->mask & psw_int_mask) != psw_int_mask
+	    || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
+	    || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
+		return __inject_sigp_emergency(vcpu, dst_vcpu);
+	} else {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+}
+
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_INT_EXTERNAL_CALL,
+		.u.extcall.code = vcpu->vcpu_id,
+	};
+	int rc;
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (rc == -EBUSY) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_EXT_CALL_PENDING;
+		return SIGP_CC_STATUS_STORED;
+	} else if (rc == 0) {
+		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+			   dst_vcpu->vcpu_id);
+	}
+
+	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
+}
+
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_SIGP_STOP,
+	};
+	int rc;
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (rc == -EBUSY)
+		rc = SIGP_CC_BUSY;
+	else if (rc == 0)
+		VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x",
+			   dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_SIGP_STOP,
+		.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS,
+	};
+	int rc;
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (rc == -EBUSY)
+		rc = SIGP_CC_BUSY;
+	else if (rc == 0)
+		VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+			   dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
+{
+	int rc;
+	unsigned int i;
+	struct kvm_vcpu *v;
+
+	switch (parameter & 0xff) {
+	case 0:
+		rc = SIGP_CC_NOT_OPERATIONAL;
+		break;
+	case 1:
+	case 2:
+		kvm_for_each_vcpu(i, v, vcpu->kvm) {
+			v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+			kvm_clear_async_pf_completion_queue(v);
+		}
+
+		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+	}
+	return rc;
+}
+
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+			     u32 address, u64 *reg)
+{
+	struct kvm_s390_irq irq = {
+		.type = KVM_S390_SIGP_SET_PREFIX,
+		.u.prefix.address = address & 0x7fffe000u,
+	};
+	int rc;
+
+	/*
+	 * Make sure the new value is valid memory. We only need to check the
+	 * first page, since address is 8k aligned and memory pieces are always
+	 * at least 1MB aligned and have at least a size of 1MB.
+	 */
+	if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INVALID_PARAMETER;
+		return SIGP_CC_STATUS_STORED;
+	}
+
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
+	if (rc == -EBUSY) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	} else if (rc == 0) {
+		VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x",
+			   dst_vcpu->vcpu_id, irq.u.prefix.address);
+	}
+
+	return rc;
+}
+
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+				       struct kvm_vcpu *dst_vcpu,
+				       u32 addr, u64 *reg)
+{
+	int flags;
+	int rc;
+
+	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
+	if (!(flags & CPUSTAT_STOPPED)) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+
+	addr &= 0x7ffffe00;
+	rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
+	if (rc == -EFAULT) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INVALID_PARAMETER;
+		rc = SIGP_CC_STATUS_STORED;
+	}
+	return rc;
+}
+
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	struct kvm_s390_local_interrupt *li;
+	int rc;
+
+	li = &dst_vcpu->arch.local_int;
+	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+		/* running */
+		rc = SIGP_CC_ORDER_CODE_ACCEPTED;
+	} else {
+		/* not running */
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_NOT_RUNNING;
+		rc = SIGP_CC_STATUS_STORED;
+	}
+
+	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+		   dst_vcpu->vcpu_id, rc);
+
+	return rc;
+}
+
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+				   struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+	/* handle (RE)START in user space */
+	int rc = -EOPNOTSUPP;
+
+	/* make sure we don't race with STOP irq injection */
+	spin_lock(&li->lock);
+	if (kvm_s390_is_stop_irq_pending(dst_vcpu))
+		rc = SIGP_CC_BUSY;
+	spin_unlock(&li->lock);
+
+	return rc;
+}
+
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+	/* handle (INITIAL) CPU RESET in user space */
+	return -EOPNOTSUPP;
+}
+
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+				  struct kvm_vcpu *dst_vcpu)
+{
+	/* handle unknown orders in user space */
+	return -EOPNOTSUPP;
+}
+
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+			   u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+	int rc;
+	struct kvm_vcpu *dst_vcpu;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return SIGP_CC_NOT_OPERATIONAL;
+
+	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
+
+	switch (order_code) {
+	case SIGP_SENSE:
+		vcpu->stat.instruction_sigp_sense++;
+		rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_EXTERNAL_CALL:
+		vcpu->stat.instruction_sigp_external_call++;
+		rc = __sigp_external_call(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_EMERGENCY_SIGNAL:
+		vcpu->stat.instruction_sigp_emergency++;
+		rc = __sigp_emergency(vcpu, dst_vcpu);
+		break;
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, dst_vcpu);
+		break;
+	case SIGP_STOP_AND_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_STORE_STATUS_AT_ADDRESS:
+		vcpu->stat.instruction_sigp_store_status++;
+		rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+						 status_reg);
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
+		break;
+	case SIGP_COND_EMERGENCY_SIGNAL:
+		vcpu->stat.instruction_sigp_cond_emergency++;
+		rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+						  status_reg);
+		break;
+	case SIGP_SENSE_RUNNING:
+		vcpu->stat.instruction_sigp_sense_running++;
+		rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_START:
+		vcpu->stat.instruction_sigp_start++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+		rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+	}
+
+	if (rc == -EOPNOTSUPP)
+		VCPU_EVENT(vcpu, 4,
+			   "sigp order %u -> cpu %x: handled in user space",
+			   order_code, dst_vcpu->vcpu_id);
+
+	return rc;
+}
+
+static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
+{
+	if (!vcpu->kvm->arch.user_sigp)
+		return 0;
+
+	switch (order_code) {
+	case SIGP_SENSE:
+	case SIGP_EXTERNAL_CALL:
+	case SIGP_EMERGENCY_SIGNAL:
+	case SIGP_COND_EMERGENCY_SIGNAL:
+	case SIGP_SENSE_RUNNING:
+		return 0;
+	/* update counters as we're directly dropping to user space */
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		break;
+	case SIGP_STOP_AND_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		break;
+	case SIGP_STORE_STATUS_AT_ADDRESS:
+		vcpu->stat.instruction_sigp_store_status++;
+		break;
+	case SIGP_STORE_ADDITIONAL_STATUS:
+		vcpu->stat.instruction_sigp_store_adtl_status++;
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		break;
+	case SIGP_START:
+		vcpu->stat.instruction_sigp_start++;
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+	}
+
+	VCPU_EVENT(vcpu, 4, "sigp order %u: completely handled in user space",
+		   order_code);
+
+	return 1;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u32 parameter;
+	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+	u8 order_code;
+	int rc;
+
+	/* sigp in userspace can exit */
+	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+	order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+	if (handle_sigp_order_in_user_space(vcpu, order_code))
+		return -EOPNOTSUPP;
+
+	if (r1 % 2)
+		parameter = vcpu->run->s.regs.gprs[r1];
+	else
+		parameter = vcpu->run->s.regs.gprs[r1 + 1];
+
+	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
+	switch (order_code) {
+	case SIGP_SET_ARCHITECTURE:
+		vcpu->stat.instruction_sigp_arch++;
+		rc = __sigp_set_arch(vcpu, parameter);
+		break;
+	default:
+		rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+				     parameter,
+				     &vcpu->run->s.regs.gprs[r1]);
+	}
+
+	if (rc < 0)
+		return rc;
+
+	kvm_s390_set_psw_cc(vcpu, rc);
+	return 0;
+}
+
+/*
+ * Handle SIGP partial execution interception.
+ *
+ * This interception will occur at the source cpu when a source cpu sends an
+ * external call to a target cpu and the target cpu has the WAIT bit set in
+ * its cpuflags. Interception will occurr after the interrupt indicator bits at
+ * the target cpu have been set. All error cases will lead to instruction
+ * interception, therefore nothing is to be checked or prepared.
+ */
+int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
+{
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
+	struct kvm_vcpu *dest_vcpu;
+	u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+
+	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+
+	if (order_code == SIGP_EXTERNAL_CALL) {
+		dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+		BUG_ON(dest_vcpu == NULL);
+
+		kvm_s390_vcpu_wakeup(dest_vcpu);
+		kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
new file mode 100644
index 000000000..3208d33a4
--- /dev/null
+++ b/arch/s390/kvm/trace-s390.h
@@ -0,0 +1,282 @@
+#if !defined(_TRACE_KVMS390_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVMS390_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm-s390
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace-s390
+
+/*
+ * The TRACE_SYSTEM_VAR defaults to TRACE_SYSTEM, but must be a
+ * legitimate C variable. It is not exported to user space.
+ */
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR kvm_s390
+
+/*
+ * Trace point for the creation of the kvm instance.
+ */
+TRACE_EVENT(kvm_s390_create_vm,
+	    TP_PROTO(unsigned long type),
+	    TP_ARGS(type),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned long, type)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->type = type;
+		    ),
+
+	    TP_printk("create vm%s",
+		      __entry->type & KVM_VM_S390_UCONTROL ? " (UCONTROL)" : "")
+	);
+
+/*
+ * Trace points for creation and destruction of vpcus.
+ */
+TRACE_EVENT(kvm_s390_create_vcpu,
+	    TP_PROTO(unsigned int id, struct kvm_vcpu *vcpu,
+		     struct kvm_s390_sie_block *sie_block),
+	    TP_ARGS(id, vcpu, sie_block),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(struct kvm_vcpu *, vcpu)
+		    __field(struct kvm_s390_sie_block *, sie_block)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->vcpu = vcpu;
+		    __entry->sie_block = sie_block;
+		    ),
+
+	    TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
+		      __entry->vcpu, __entry->sie_block)
+	);
+
+TRACE_EVENT(kvm_s390_destroy_vcpu,
+	    TP_PROTO(unsigned int id),
+	    TP_ARGS(id),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    ),
+
+	    TP_printk("destroy cpu %d", __entry->id)
+	);
+
+/*
+ * Trace point for start and stop of vpcus.
+ */
+TRACE_EVENT(kvm_s390_vcpu_start_stop,
+	    TP_PROTO(unsigned int id, int state),
+	    TP_ARGS(id, state),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(int, state)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->state = state;
+		    ),
+
+	    TP_printk("%s cpu %d", __entry->state ? "starting" : "stopping",
+		      __entry->id)
+	);
+
+/*
+ * Trace points for injection of interrupts, either per machine or
+ * per vcpu.
+ */
+
+#define kvm_s390_int_type						\
+	{KVM_S390_SIGP_STOP, "sigp stop"},				\
+	{KVM_S390_PROGRAM_INT, "program interrupt"},			\
+	{KVM_S390_SIGP_SET_PREFIX, "sigp set prefix"},			\
+	{KVM_S390_RESTART, "sigp restart"},				\
+	{KVM_S390_INT_VIRTIO, "virtio interrupt"},			\
+	{KVM_S390_INT_SERVICE, "sclp interrupt"},			\
+	{KVM_S390_INT_EMERGENCY, "sigp emergency"},			\
+	{KVM_S390_INT_EXTERNAL_CALL, "sigp ext call"}
+
+TRACE_EVENT(kvm_s390_inject_vm,
+	    TP_PROTO(__u64 type, __u32 parm, __u64 parm64, int who),
+	    TP_ARGS(type, parm, parm64, who),
+
+	    TP_STRUCT__entry(
+		    __field(__u32, inttype)
+		    __field(__u32, parm)
+		    __field(__u64, parm64)
+		    __field(int, who)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->inttype = type & 0x00000000ffffffff;
+		    __entry->parm = parm;
+		    __entry->parm64 = parm64;
+		    __entry->who = who;
+		    ),
+
+	    TP_printk("inject%s: type:%x (%s) parm:%x parm64:%llx",
+		      (__entry->who == 1) ? " (from kernel)" :
+		      (__entry->who == 2) ? " (from user)" : "",
+		      __entry->inttype,
+		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
+		      __entry->parm, __entry->parm64)
+	);
+
+TRACE_EVENT(kvm_s390_inject_vcpu,
+	    TP_PROTO(unsigned int id, __u64 type, __u32 parm, __u64 parm64, \
+		     int who),
+	    TP_ARGS(id, type, parm, parm64, who),
+
+	    TP_STRUCT__entry(
+		    __field(int, id)
+		    __field(__u32, inttype)
+		    __field(__u32, parm)
+		    __field(__u64, parm64)
+		    __field(int, who)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->inttype = type & 0x00000000ffffffff;
+		    __entry->parm = parm;
+		    __entry->parm64 = parm64;
+		    __entry->who = who;
+		    ),
+
+	    TP_printk("inject%s (vcpu %d): type:%x (%s) parm:%x parm64:%llx",
+		      (__entry->who == 1) ? " (from kernel)" :
+		      (__entry->who == 2) ? " (from user)" : "",
+		      __entry->id, __entry->inttype,
+		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
+		      __entry->parm, __entry->parm64)
+	);
+
+/*
+ * Trace point for the actual delivery of interrupts.
+ */
+TRACE_EVENT(kvm_s390_deliver_interrupt,
+	    TP_PROTO(unsigned int id, __u64 type, __u64 data0, __u64 data1),
+	    TP_ARGS(id, type, data0, data1),
+
+	    TP_STRUCT__entry(
+		    __field(int, id)
+		    __field(__u32, inttype)
+		    __field(__u64, data0)
+		    __field(__u64, data1)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->inttype = type & 0x00000000ffffffff;
+		    __entry->data0 = data0;
+		    __entry->data1 = data1;
+		    ),
+
+	    TP_printk("deliver interrupt (vcpu %d): type:%x (%s) "	\
+		      "data:%08llx %016llx",
+		      __entry->id, __entry->inttype,
+		      __print_symbolic(__entry->inttype, kvm_s390_int_type),
+		      __entry->data0, __entry->data1)
+	);
+
+/*
+ * Trace point for resets that may be requested from userspace.
+ */
+TRACE_EVENT(kvm_s390_request_resets,
+	    TP_PROTO(__u64 resets),
+	    TP_ARGS(resets),
+
+	    TP_STRUCT__entry(
+		    __field(__u64, resets)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->resets = resets;
+		    ),
+
+	    TP_printk("requesting userspace resets %llx",
+		      __entry->resets)
+	);
+
+/*
+ * Trace point for a vcpu's stop requests.
+ */
+TRACE_EVENT(kvm_s390_stop_request,
+	    TP_PROTO(unsigned char stop_irq, unsigned char flags),
+	    TP_ARGS(stop_irq, flags),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned char, stop_irq)
+		    __field(unsigned char, flags)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->stop_irq = stop_irq;
+		    __entry->flags = flags;
+		    ),
+
+	    TP_printk("stop request, stop irq = %u, flags = %08x",
+		      __entry->stop_irq, __entry->flags)
+	);
+
+
+/*
+ * Trace point for enabling channel I/O instruction support.
+ */
+TRACE_EVENT(kvm_s390_enable_css,
+	    TP_PROTO(void *kvm),
+	    TP_ARGS(kvm),
+
+	    TP_STRUCT__entry(
+		    __field(void *, kvm)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->kvm = kvm;
+		    ),
+
+	    TP_printk("enabling channel I/O support (kvm @ %p)\n",
+		      __entry->kvm)
+	);
+
+/*
+ * Trace point for enabling and disabling interlocking-and-broadcasting
+ * suppression.
+ */
+TRACE_EVENT(kvm_s390_enable_disable_ibs,
+	    TP_PROTO(unsigned int id, int state),
+	    TP_ARGS(id, state),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(int, state)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->state = state;
+		    ),
+
+	    TP_printk("%s ibs on cpu %d",
+		      __entry->state ? "enabling" : "disabling", __entry->id)
+	);
+
+
+#endif /* _TRACE_KVMS390_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
new file mode 100644
index 000000000..916834d7a
--- /dev/null
+++ b/arch/s390/kvm/trace.h
@@ -0,0 +1,418 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/sie.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Helpers for vcpu-specific tracepoints containing the same information
+ * as s390dbf VCPU_EVENTs.
+ */
+#define VCPU_PROTO_COMMON struct kvm_vcpu *vcpu
+#define VCPU_ARGS_COMMON vcpu
+#define VCPU_FIELD_COMMON __field(int, id)			\
+	__field(unsigned long, pswmask)				\
+	__field(unsigned long, pswaddr)
+#define VCPU_ASSIGN_COMMON do {						\
+	__entry->id = vcpu->vcpu_id;					\
+	__entry->pswmask = vcpu->arch.sie_block->gpsw.mask;		\
+	__entry->pswaddr = vcpu->arch.sie_block->gpsw.addr;		\
+	} while (0);
+#define VCPU_TP_PRINTK(p_str, p_args...)				\
+	TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id,		\
+		  __entry->pswmask, __entry->pswaddr, p_args)
+
+TRACE_EVENT(kvm_s390_skey_related_inst,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+	    VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
+	);
+
+TRACE_EVENT(kvm_s390_major_guest_pfault,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+	    VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
+	);
+
+TRACE_EVENT(kvm_s390_pfault_init,
+	    TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+	    TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(long, pfault_token)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->pfault_token = pfault_token;
+		    ),
+	    VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
+	);
+
+TRACE_EVENT(kvm_s390_pfault_done,
+	    TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
+	    TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(long, pfault_token)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->pfault_token = pfault_token;
+		    ),
+	    VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
+	);
+
+/*
+ * Tracepoints for SIE entry and exit.
+ */
+TRACE_EVENT(kvm_s390_sie_enter,
+	    TP_PROTO(VCPU_PROTO_COMMON, int cpuflags),
+	    TP_ARGS(VCPU_ARGS_COMMON, cpuflags),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, cpuflags)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->cpuflags = cpuflags;
+		    ),
+
+	    VCPU_TP_PRINTK("entering sie flags %x", __entry->cpuflags)
+	);
+
+TRACE_EVENT(kvm_s390_sie_fault,
+	    TP_PROTO(VCPU_PROTO_COMMON),
+	    TP_ARGS(VCPU_ARGS_COMMON),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    ),
+
+	    VCPU_TP_PRINTK("%s", "fault in sie instruction")
+	);
+
+TRACE_EVENT(kvm_s390_sie_exit,
+	    TP_PROTO(VCPU_PROTO_COMMON, u8 icptcode),
+	    TP_ARGS(VCPU_ARGS_COMMON, icptcode),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(u8, icptcode)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->icptcode = icptcode;
+		    ),
+
+	    VCPU_TP_PRINTK("exit sie icptcode %d (%s)", __entry->icptcode,
+			   __print_symbolic(__entry->icptcode,
+					    sie_intercept_code))
+	);
+
+/*
+ * Trace point for intercepted instructions.
+ */
+TRACE_EVENT(kvm_s390_intercept_instruction,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+	    TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u64, instruction)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->instruction = ((__u64)ipa << 48) |
+		    ((__u64)ipb << 16);
+		    ),
+
+	    VCPU_TP_PRINTK("intercepted instruction %016llx (%s)",
+			   __entry->instruction,
+			   __print_symbolic(icpt_insn_decoder(__entry->instruction),
+					    icpt_insn_codes))
+	);
+
+/*
+ * Trace point for intercepted program interruptions.
+ */
+TRACE_EVENT(kvm_s390_intercept_prog,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+	    TP_ARGS(VCPU_ARGS_COMMON, code),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u16, code)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->code = code;
+		    ),
+
+	    VCPU_TP_PRINTK("intercepted program interruption %04x",
+			   __entry->code)
+	);
+
+/*
+ * Trace point for validity intercepts.
+ */
+TRACE_EVENT(kvm_s390_intercept_validity,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 viwhy),
+	    TP_ARGS(VCPU_ARGS_COMMON, viwhy),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u16, viwhy)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->viwhy = viwhy;
+		    ),
+
+	    VCPU_TP_PRINTK("got validity intercept %04x", __entry->viwhy)
+	);
+
+/*
+ * Trace points for instructions that are of special interest.
+ */
+
+TRACE_EVENT(kvm_s390_handle_sigp,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr, \
+		     __u32 parameter),
+	    TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr, parameter),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u8, order_code)
+		    __field(__u16, cpu_addr)
+		    __field(__u32, parameter)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->order_code = order_code;
+		    __entry->cpu_addr = cpu_addr;
+		    __entry->parameter = parameter;
+		    ),
+
+	    VCPU_TP_PRINTK("handle sigp order %02x (%s), cpu address %04x, " \
+			   "parameter %08x", __entry->order_code,
+			   __print_symbolic(__entry->order_code,
+					    sigp_order_codes),
+			   __entry->cpu_addr, __entry->parameter)
+	);
+
+TRACE_EVENT(kvm_s390_handle_sigp_pei,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u8 order_code, __u16 cpu_addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, order_code, cpu_addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u8, order_code)
+		    __field(__u16, cpu_addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->order_code = order_code;
+		    __entry->cpu_addr = cpu_addr;
+		    ),
+
+	    VCPU_TP_PRINTK("handle sigp pei order %02x (%s), cpu address %04x",
+			   __entry->order_code,
+			   __print_symbolic(__entry->order_code,
+					    sigp_order_codes),
+			   __entry->cpu_addr)
+	);
+
+TRACE_EVENT(kvm_s390_handle_diag,
+	    TP_PROTO(VCPU_PROTO_COMMON, __u16 code),
+	    TP_ARGS(VCPU_ARGS_COMMON, code),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(__u16, code)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->code = code;
+		    ),
+
+	    VCPU_TP_PRINTK("handle diagnose call %04x (%s)", __entry->code,
+			   __print_symbolic(__entry->code, diagnose_codes))
+	);
+
+TRACE_EVENT(kvm_s390_handle_lctl,
+	    TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, g)
+		    __field(int, reg1)
+		    __field(int, reg3)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->g = g;
+		    __entry->reg1 = reg1;
+		    __entry->reg3 = reg3;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("%s: loading cr %x-%x from %016llx",
+			   __entry->g ? "lctlg" : "lctl",
+			   __entry->reg1, __entry->reg3, __entry->addr)
+	);
+
+TRACE_EVENT(kvm_s390_handle_stctl,
+	    TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, g)
+		    __field(int, reg1)
+		    __field(int, reg3)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->g = g;
+		    __entry->reg1 = reg1;
+		    __entry->reg3 = reg3;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
+			   __entry->g ? "stctg" : "stctl",
+			   __entry->reg1, __entry->reg3, __entry->addr)
+	);
+
+TRACE_EVENT(kvm_s390_handle_prefix,
+	    TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
+	    TP_ARGS(VCPU_ARGS_COMMON, set, address),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, set)
+		    __field(u32, address)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->set = set;
+		    __entry->address = address;
+		    ),
+
+	    VCPU_TP_PRINTK("%s prefix to %08x",
+			   __entry->set ? "setting" : "storing",
+			   __entry->address)
+	);
+
+TRACE_EVENT(kvm_s390_handle_stap,
+	    TP_PROTO(VCPU_PROTO_COMMON, u64 address),
+	    TP_ARGS(VCPU_ARGS_COMMON, address),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(u64, address)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->address = address;
+		    ),
+
+	    VCPU_TP_PRINTK("storing cpu address to %016llx",
+			   __entry->address)
+	);
+
+TRACE_EVENT(kvm_s390_handle_stfl,
+	    TP_PROTO(VCPU_PROTO_COMMON, unsigned int facility_list),
+	    TP_ARGS(VCPU_ARGS_COMMON, facility_list),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(unsigned int, facility_list)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->facility_list = facility_list;
+		    ),
+
+	    VCPU_TP_PRINTK("store facility list value %08x",
+			   __entry->facility_list)
+	);
+
+TRACE_EVENT(kvm_s390_handle_stsi,
+	    TP_PROTO(VCPU_PROTO_COMMON, int fc, int sel1, int sel2, u64 addr),
+	    TP_ARGS(VCPU_ARGS_COMMON, fc, sel1, sel2, addr),
+
+	    TP_STRUCT__entry(
+		    VCPU_FIELD_COMMON
+		    __field(int, fc)
+		    __field(int, sel1)
+		    __field(int, sel2)
+		    __field(u64, addr)
+		    ),
+
+	    TP_fast_assign(
+		    VCPU_ASSIGN_COMMON
+		    __entry->fc = fc;
+		    __entry->sel1 = sel1;
+		    __entry->sel2 = sel2;
+		    __entry->addr = addr;
+		    ),
+
+	    VCPU_TP_PRINTK("STSI %d.%d.%d information stored to %016llx",
+			   __entry->fc, __entry->sel1, __entry->sel2,
+			   __entry->addr)
+	);
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
new file mode 100644
index 000000000..0e8fefe5b
--- /dev/null
+++ b/arch/s390/lib/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for s390-specific library files..
+#
+
+lib-y += delay.o string.o uaccess.o find.o
+obj-y += mem.o
+lib-$(CONFIG_SMP) += spinlock.o
+lib-$(CONFIG_KPROBES) += probes.o
+lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
new file mode 100644
index 000000000..16dc42d83
--- /dev/null
+++ b/arch/s390/lib/delay.c
@@ -0,0 +1,129 @@
+/*
+ *    Precise Delay Loops for S390
+ *
+ *    Copyright IBM Corp. 1999, 2008
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/module.h>
+#include <linux/irqflags.h>
+#include <linux/interrupt.h>
+#include <asm/vtimer.h>
+#include <asm/div64.h>
+
+void __delay(unsigned long loops)
+{
+        /*
+         * To end the bloody studid and useless discussion about the
+         * BogoMips number I took the liberty to define the __delay
+         * function in a way that that resulting BogoMips number will
+         * yield the megahertz number of the cpu. The important function
+         * is udelay and that is done using the tod clock. -- martin.
+         */
+	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
+}
+
+static void __udelay_disabled(unsigned long long usecs)
+{
+	unsigned long cr0, cr6, new;
+	u64 clock_saved, end;
+
+	end = get_tod_clock() + (usecs << 12);
+	clock_saved = local_tick_disable();
+	__ctl_store(cr0, 0, 0);
+	__ctl_store(cr6, 6, 6);
+	new = (cr0 &  0xffff00e0) | 0x00000800;
+	__ctl_load(new , 0, 0);
+	new = 0;
+	__ctl_load(new, 6, 6);
+	lockdep_off();
+	do {
+		set_clock_comparator(end);
+		enabled_wait();
+	} while (get_tod_clock_fast() < end);
+	lockdep_on();
+	__ctl_load(cr0, 0, 0);
+	__ctl_load(cr6, 6, 6);
+	local_tick_enable(clock_saved);
+}
+
+static void __udelay_enabled(unsigned long long usecs)
+{
+	u64 clock_saved, end;
+
+	end = get_tod_clock_fast() + (usecs << 12);
+	do {
+		clock_saved = 0;
+		if (end < S390_lowcore.clock_comparator) {
+			clock_saved = local_tick_disable();
+			set_clock_comparator(end);
+		}
+		enabled_wait();
+		if (clock_saved)
+			local_tick_enable(clock_saved);
+	} while (get_tod_clock_fast() < end);
+}
+
+/*
+ * Waits for 'usecs' microseconds using the TOD clock comparator.
+ */
+void __udelay(unsigned long long usecs)
+{
+	unsigned long flags;
+
+	preempt_disable();
+	local_irq_save(flags);
+	if (in_irq()) {
+		__udelay_disabled(usecs);
+		goto out;
+	}
+	if (in_softirq()) {
+		if (raw_irqs_disabled_flags(flags))
+			__udelay_disabled(usecs);
+		else
+			__udelay_enabled(usecs);
+		goto out;
+	}
+	if (raw_irqs_disabled_flags(flags)) {
+		local_bh_disable();
+		__udelay_disabled(usecs);
+		_local_bh_enable();
+		goto out;
+	}
+	__udelay_enabled(usecs);
+out:
+	local_irq_restore(flags);
+	preempt_enable();
+}
+EXPORT_SYMBOL(__udelay);
+
+/*
+ * Simple udelay variant. To be used on startup and reboot
+ * when the interrupt handler isn't working.
+ */
+void udelay_simple(unsigned long long usecs)
+{
+	u64 end;
+
+	end = get_tod_clock_fast() + (usecs << 12);
+	while (get_tod_clock_fast() < end)
+		cpu_relax();
+}
+
+void __ndelay(unsigned long long nsecs)
+{
+	u64 end;
+
+	nsecs <<= 9;
+	do_div(nsecs, 125);
+	end = get_tod_clock_fast() + nsecs;
+	if (nsecs & ~0xfffUL)
+		__udelay(nsecs >> 12);
+	while (get_tod_clock_fast() < end)
+		barrier();
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c
new file mode 100644
index 000000000..922003c1b
--- /dev/null
+++ b/arch/s390/lib/find.c
@@ -0,0 +1,77 @@
+/*
+ * MSB0 numbered special bitops handling.
+ *
+ * On s390x the bits are numbered:
+ *   |0..............63|64............127|128...........191|192...........255|
+ * and on s390:
+ *   |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255|
+ *
+ * The reason for this bit numbering is the fact that the hardware sets bits
+ * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap
+ * from the 'wrong end'.
+ */
+
+#include <linux/compiler.h>
+#include <linux/bitops.h>
+#include <linux/export.h>
+
+unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size)
+{
+	const unsigned long *p = addr;
+	unsigned long result = 0;
+	unsigned long tmp;
+
+	while (size & ~(BITS_PER_LONG - 1)) {
+		if ((tmp = *(p++)))
+			goto found;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = (*p) & (~0UL << (BITS_PER_LONG - size));
+	if (!tmp)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found:
+	return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_first_bit_inv);
+
+unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size,
+				unsigned long offset)
+{
+	const unsigned long *p = addr + (offset / BITS_PER_LONG);
+	unsigned long result = offset & ~(BITS_PER_LONG - 1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= (~0UL >> offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+found_first:
+	tmp &= (~0UL << (BITS_PER_LONG - size));
+	if (!tmp)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + (__fls(tmp) ^ (BITS_PER_LONG - 1));
+}
+EXPORT_SYMBOL(find_next_bit_inv);
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
new file mode 100644
index 000000000..c6d553e85
--- /dev/null
+++ b/arch/s390/lib/mem.S
@@ -0,0 +1,88 @@
+/*
+ * String handling functions.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+/*
+ * memset implementation
+ *
+ * This code corresponds to the C construct below. We do distinguish
+ * between clearing (c == 0) and setting a memory array (c != 0) simply
+ * because nearly all memset invocations in the kernel clear memory and
+ * the xc instruction is preferred in such cases.
+ *
+ * void *memset(void *s, int c, size_t n)
+ * {
+ *	if (likely(c == 0))
+ *		return __builtin_memset(s, 0, n);
+ *	return __builtin_memset(s, c, n);
+ * }
+ */
+ENTRY(memset)
+	ltgr	%r4,%r4
+	bzr	%r14
+	ltgr	%r3,%r3
+	jnz	.Lmemset_fill
+	aghi	%r4,-1
+	srlg	%r3,%r4,8
+	ltgr	%r3,%r3
+	lgr	%r1,%r2
+	jz	.Lmemset_clear_rest
+.Lmemset_clear_loop:
+	xc	0(256,%r1),0(%r1)
+	la	%r1,256(%r1)
+	brctg	%r3,.Lmemset_clear_loop
+.Lmemset_clear_rest:
+	larl	%r3,.Lmemset_xc
+	ex	%r4,0(%r3)
+	br	%r14
+.Lmemset_fill:
+	stc	%r3,0(%r2)
+	cghi	%r4,1
+	lgr	%r1,%r2
+	ber	%r14
+	aghi	%r4,-2
+	srlg	%r3,%r4,8
+	ltgr	%r3,%r3
+	jz	.Lmemset_fill_rest
+.Lmemset_fill_loop:
+	mvc	1(256,%r1),0(%r1)
+	la	%r1,256(%r1)
+	brctg	%r3,.Lmemset_fill_loop
+.Lmemset_fill_rest:
+	larl	%r3,.Lmemset_mvc
+	ex	%r4,0(%r3)
+	br	%r14
+.Lmemset_xc:
+	xc	0(1,%r1),0(%r1)
+.Lmemset_mvc:
+	mvc	1(1,%r1),0(%r1)
+
+/*
+ * memcpy implementation
+ *
+ * void *memcpy(void *dest, const void *src, size_t n)
+ */
+ENTRY(memcpy)
+	ltgr	%r4,%r4
+	bzr	%r14
+	aghi	%r4,-1
+	srlg	%r5,%r4,8
+	ltgr	%r5,%r5
+	lgr	%r1,%r2
+	jnz	.Lmemcpy_loop
+.Lmemcpy_rest:
+	larl	%r5,.Lmemcpy_mvc
+	ex	%r4,0(%r5)
+	br	%r14
+.Lmemcpy_loop:
+	mvc	0(256,%r1),0(%r3)
+	la	%r1,256(%r1)
+	la	%r3,256(%r3)
+	brctg	%r5,.Lmemcpy_loop
+	j	.Lmemcpy_rest
+.Lmemcpy_mvc:
+	mvc	0(1,%r1),0(%r3)
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
new file mode 100644
index 000000000..ae90e1ae3
--- /dev/null
+++ b/arch/s390/lib/probes.c
@@ -0,0 +1,159 @@
+/*
+ *    Common helper functions for kprobes and uprobes
+ *
+ *    Copyright IBM Corp. 2014
+ */
+
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+
+int probe_is_prohibited_opcode(u16 *insn)
+{
+	if (!is_known_insn((unsigned char *)insn))
+		return -EINVAL;
+	switch (insn[0] >> 8) {
+	case 0x0c:	/* bassm */
+	case 0x0b:	/* bsm	 */
+	case 0x83:	/* diag  */
+	case 0x44:	/* ex	 */
+	case 0xac:	/* stnsm */
+	case 0xad:	/* stosm */
+		return -EINVAL;
+	case 0xc6:
+		switch (insn[0] & 0x0f) {
+		case 0x00: /* exrl   */
+			return -EINVAL;
+		}
+	}
+	switch (insn[0]) {
+	case 0x0101:	/* pr	 */
+	case 0xb25a:	/* bsa	 */
+	case 0xb240:	/* bakr  */
+	case 0xb258:	/* bsg	 */
+	case 0xb218:	/* pc	 */
+	case 0xb228:	/* pt	 */
+	case 0xb98d:	/* epsw	 */
+	case 0xe560:	/* tbegin */
+	case 0xe561:	/* tbeginc */
+	case 0xb2f8:	/* tend	 */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int probe_get_fixup_type(u16 *insn)
+{
+	/* default fixup method */
+	int fixup = FIXUP_PSW_NORMAL;
+
+	switch (insn[0] >> 8) {
+	case 0x05:	/* balr	*/
+	case 0x0d:	/* basr */
+		fixup = FIXUP_RETURN_REGISTER;
+		/* if r2 = 0, no branch will be taken */
+		if ((insn[0] & 0x0f) == 0)
+			fixup |= FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x06:	/* bctr	*/
+	case 0x07:	/* bcr	*/
+		fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x45:	/* bal	*/
+	case 0x4d:	/* bas	*/
+		fixup = FIXUP_RETURN_REGISTER;
+		break;
+	case 0x47:	/* bc	*/
+	case 0x46:	/* bct	*/
+	case 0x86:	/* bxh	*/
+	case 0x87:	/* bxle	*/
+		fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0x82:	/* lpsw	*/
+		fixup = FIXUP_NOT_REQUIRED;
+		break;
+	case 0xb2:	/* lpswe */
+		if ((insn[0] & 0xff) == 0xb2)
+			fixup = FIXUP_NOT_REQUIRED;
+		break;
+	case 0xa7:	/* bras	*/
+		if ((insn[0] & 0x0f) == 0x05)
+			fixup |= FIXUP_RETURN_REGISTER;
+		break;
+	case 0xc0:
+		if ((insn[0] & 0x0f) == 0x05)	/* brasl */
+			fixup |= FIXUP_RETURN_REGISTER;
+		break;
+	case 0xeb:
+		switch (insn[2] & 0xff) {
+		case 0x44: /* bxhg  */
+		case 0x45: /* bxleg */
+			fixup = FIXUP_BRANCH_NOT_TAKEN;
+			break;
+		}
+		break;
+	case 0xe3:	/* bctg	*/
+		if ((insn[2] & 0xff) == 0x46)
+			fixup = FIXUP_BRANCH_NOT_TAKEN;
+		break;
+	case 0xec:
+		switch (insn[2] & 0xff) {
+		case 0xe5: /* clgrb */
+		case 0xe6: /* cgrb  */
+		case 0xf6: /* crb   */
+		case 0xf7: /* clrb  */
+		case 0xfc: /* cgib  */
+		case 0xfd: /* cglib */
+		case 0xfe: /* cib   */
+		case 0xff: /* clib  */
+			fixup = FIXUP_BRANCH_NOT_TAKEN;
+			break;
+		}
+		break;
+	}
+	return fixup;
+}
+
+int probe_is_insn_relative_long(u16 *insn)
+{
+	/* Check if we have a RIL-b or RIL-c format instruction which
+	 * we need to modify in order to avoid instruction emulation. */
+	switch (insn[0] >> 8) {
+	case 0xc0:
+		if ((insn[0] & 0x0f) == 0x00) /* larl */
+			return true;
+		break;
+	case 0xc4:
+		switch (insn[0] & 0x0f) {
+		case 0x02: /* llhrl  */
+		case 0x04: /* lghrl  */
+		case 0x05: /* lhrl   */
+		case 0x06: /* llghrl */
+		case 0x07: /* sthrl  */
+		case 0x08: /* lgrl   */
+		case 0x0b: /* stgrl  */
+		case 0x0c: /* lgfrl  */
+		case 0x0d: /* lrl    */
+		case 0x0e: /* llgfrl */
+		case 0x0f: /* strl   */
+			return true;
+		}
+		break;
+	case 0xc6:
+		switch (insn[0] & 0x0f) {
+		case 0x02: /* pfdrl  */
+		case 0x04: /* cghrl  */
+		case 0x05: /* chrl   */
+		case 0x06: /* clghrl */
+		case 0x07: /* clhrl  */
+		case 0x08: /* cgrl   */
+		case 0x0a: /* clgrl  */
+		case 0x0c: /* cgfrl  */
+		case 0x0d: /* crl    */
+		case 0x0e: /* clgfrl */
+		case 0x0f: /* clrl   */
+			return true;
+		}
+		break;
+	}
+	return false;
+}
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
new file mode 100644
index 000000000..d6c9991f7
--- /dev/null
+++ b/arch/s390/lib/spinlock.c
@@ -0,0 +1,272 @@
+/*
+ *    Out of line spinlock code.
+ *
+ *    Copyright IBM Corp. 2004, 2006
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/io.h>
+
+int spin_retry = -1;
+
+static int __init spin_retry_init(void)
+{
+	if (spin_retry < 0)
+		spin_retry = MACHINE_HAS_CAD ? 10 : 1000;
+	return 0;
+}
+early_initcall(spin_retry_init);
+
+/**
+ * spin_retry= parameter
+ */
+static int __init spin_retry_setup(char *str)
+{
+	spin_retry = simple_strtoul(str, &str, 0);
+	return 1;
+}
+__setup("spin_retry=", spin_retry_setup);
+
+static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old)
+{
+	asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock));
+}
+
+void arch_spin_lock_wait(arch_spinlock_t *lp)
+{
+	unsigned int cpu = SPINLOCK_LOCKVAL;
+	unsigned int owner;
+	int count;
+
+	while (1) {
+		owner = ACCESS_ONCE(lp->lock);
+		/* Try to get the lock if it is free. */
+		if (!owner) {
+			if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+				return;
+			continue;
+		}
+		/* Check if the lock owner is running. */
+		if (!smp_vcpu_scheduled(~owner)) {
+			smp_yield_cpu(~owner);
+			continue;
+		}
+		/* Loop for a while on the lock value. */
+		count = spin_retry;
+		do {
+			if (MACHINE_HAS_CAD)
+				_raw_compare_and_delay(&lp->lock, owner);
+			owner = ACCESS_ONCE(lp->lock);
+		} while (owner && count-- > 0);
+		if (!owner)
+			continue;
+		/*
+		 * For multiple layers of hypervisors, e.g. z/VM + LPAR
+		 * yield the CPU if the lock is still unavailable.
+		 */
+		if (!MACHINE_IS_LPAR)
+			smp_yield_cpu(~owner);
+	}
+}
+EXPORT_SYMBOL(arch_spin_lock_wait);
+
+void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
+{
+	unsigned int cpu = SPINLOCK_LOCKVAL;
+	unsigned int owner;
+	int count;
+
+	local_irq_restore(flags);
+	while (1) {
+		owner = ACCESS_ONCE(lp->lock);
+		/* Try to get the lock if it is free. */
+		if (!owner) {
+			local_irq_disable();
+			if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+				return;
+			local_irq_restore(flags);
+		}
+		/* Check if the lock owner is running. */
+		if (!smp_vcpu_scheduled(~owner)) {
+			smp_yield_cpu(~owner);
+			continue;
+		}
+		/* Loop for a while on the lock value. */
+		count = spin_retry;
+		do {
+			if (MACHINE_HAS_CAD)
+				_raw_compare_and_delay(&lp->lock, owner);
+			owner = ACCESS_ONCE(lp->lock);
+		} while (owner && count-- > 0);
+		if (!owner)
+			continue;
+		/*
+		 * For multiple layers of hypervisors, e.g. z/VM + LPAR
+		 * yield the CPU if the lock is still unavailable.
+		 */
+		if (!MACHINE_IS_LPAR)
+			smp_yield_cpu(~owner);
+	}
+}
+EXPORT_SYMBOL(arch_spin_lock_wait_flags);
+
+int arch_spin_trylock_retry(arch_spinlock_t *lp)
+{
+	unsigned int cpu = SPINLOCK_LOCKVAL;
+	unsigned int owner;
+	int count;
+
+	for (count = spin_retry; count > 0; count--) {
+		owner = ACCESS_ONCE(lp->lock);
+		/* Try to get the lock if it is free. */
+		if (!owner) {
+			if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+				return 1;
+		} else if (MACHINE_HAS_CAD)
+			_raw_compare_and_delay(&lp->lock, owner);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(arch_spin_trylock_retry);
+
+void _raw_read_lock_wait(arch_rwlock_t *rw)
+{
+	unsigned int owner, old;
+	int count = spin_retry;
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+	__RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
+#endif
+	owner = 0;
+	while (1) {
+		if (count-- <= 0) {
+			if (owner && !smp_vcpu_scheduled(~owner))
+				smp_yield_cpu(~owner);
+			count = spin_retry;
+		}
+		old = ACCESS_ONCE(rw->lock);
+		owner = ACCESS_ONCE(rw->owner);
+		if ((int) old < 0) {
+			if (MACHINE_HAS_CAD)
+				_raw_compare_and_delay(&rw->lock, old);
+			continue;
+		}
+		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
+			return;
+	}
+}
+EXPORT_SYMBOL(_raw_read_lock_wait);
+
+int _raw_read_trylock_retry(arch_rwlock_t *rw)
+{
+	unsigned int old;
+	int count = spin_retry;
+
+	while (count-- > 0) {
+		old = ACCESS_ONCE(rw->lock);
+		if ((int) old < 0) {
+			if (MACHINE_HAS_CAD)
+				_raw_compare_and_delay(&rw->lock, old);
+			continue;
+		}
+		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
+			return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(_raw_read_trylock_retry);
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
+{
+	unsigned int owner, old;
+	int count = spin_retry;
+
+	owner = 0;
+	while (1) {
+		if (count-- <= 0) {
+			if (owner && !smp_vcpu_scheduled(~owner))
+				smp_yield_cpu(~owner);
+			count = spin_retry;
+		}
+		old = ACCESS_ONCE(rw->lock);
+		owner = ACCESS_ONCE(rw->owner);
+		smp_rmb();
+		if ((int) old >= 0) {
+			prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
+			old = prev;
+		}
+		if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
+			break;
+		if (MACHINE_HAS_CAD)
+			_raw_compare_and_delay(&rw->lock, old);
+	}
+}
+EXPORT_SYMBOL(_raw_write_lock_wait);
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+void _raw_write_lock_wait(arch_rwlock_t *rw)
+{
+	unsigned int owner, old, prev;
+	int count = spin_retry;
+
+	prev = 0x80000000;
+	owner = 0;
+	while (1) {
+		if (count-- <= 0) {
+			if (owner && !smp_vcpu_scheduled(~owner))
+				smp_yield_cpu(~owner);
+			count = spin_retry;
+		}
+		old = ACCESS_ONCE(rw->lock);
+		owner = ACCESS_ONCE(rw->owner);
+		if ((int) old >= 0 &&
+		    _raw_compare_and_swap(&rw->lock, old, old | 0x80000000))
+			prev = old;
+		else
+			smp_rmb();
+		if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
+			break;
+		if (MACHINE_HAS_CAD)
+			_raw_compare_and_delay(&rw->lock, old);
+	}
+}
+EXPORT_SYMBOL(_raw_write_lock_wait);
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+int _raw_write_trylock_retry(arch_rwlock_t *rw)
+{
+	unsigned int old;
+	int count = spin_retry;
+
+	while (count-- > 0) {
+		old = ACCESS_ONCE(rw->lock);
+		if (old) {
+			if (MACHINE_HAS_CAD)
+				_raw_compare_and_delay(&rw->lock, old);
+			continue;
+		}
+		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
+			return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(_raw_write_trylock_retry);
+
+void arch_lock_relax(unsigned int cpu)
+{
+	if (!cpu)
+		return;
+	if (MACHINE_IS_LPAR && smp_vcpu_scheduled(~cpu))
+		return;
+	smp_yield_cpu(~cpu);
+}
+EXPORT_SYMBOL(arch_lock_relax);
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
new file mode 100644
index 000000000..b647d5ff0
--- /dev/null
+++ b/arch/s390/lib/string.c
@@ -0,0 +1,342 @@
+/*
+ *    Optimized string functions
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 2004
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define IN_ARCH_STRING_C 1
+
+#include <linux/types.h>
+#include <linux/module.h>
+
+/*
+ * Helper functions to find the end of a string
+ */
+static inline char *__strend(const char *s)
+{
+	register unsigned long r0 asm("0") = 0;
+
+	asm volatile ("0: srst  %0,%1\n"
+		      "   jo    0b"
+		      : "+d" (r0), "+a" (s) :  : "cc" );
+	return (char *) r0;
+}
+
+static inline char *__strnend(const char *s, size_t n)
+{
+	register unsigned long r0 asm("0") = 0;
+	const char *p = s + n;
+
+	asm volatile ("0: srst  %0,%1\n"
+		      "   jo    0b"
+		      : "+d" (p), "+a" (s) : "d" (r0) : "cc" );
+	return (char *) p;
+}
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ *
+ * returns the length of @s
+ */
+size_t strlen(const char *s)
+{
+	return __strend(s) - s;
+}
+EXPORT_SYMBOL(strlen);
+
+/**
+ * strnlen - Find the length of a length-limited string
+ * @s: The string to be sized
+ * @n: The maximum number of bytes to search
+ *
+ * returns the minimum of the length of @s and @n
+ */
+size_t strnlen(const char * s, size_t n)
+{
+	return __strnend(s, n) - s;
+}
+EXPORT_SYMBOL(strnlen);
+
+/**
+ * strcpy - Copy a %NUL terminated string
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ *
+ * returns a pointer to @dest
+ */
+char *strcpy(char *dest, const char *src)
+{
+	register int r0 asm("0") = 0;
+	char *ret = dest;
+
+	asm volatile ("0: mvst  %0,%1\n"
+		      "   jo    0b"
+		      : "+&a" (dest), "+&a" (src) : "d" (r0)
+		      : "cc", "memory" );
+	return ret;
+}
+EXPORT_SYMBOL(strcpy);
+
+/**
+ * strlcpy - Copy a %NUL terminated string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero). It does not pad
+ * out the result like strncpy() does.
+ */
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+	size_t ret = __strend(src) - src;
+
+	if (size) {
+		size_t len = (ret >= size) ? size-1 : ret;
+		dest[len] = '\0';
+		memcpy(dest, src, len);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(strlcpy);
+
+/**
+ * strncpy - Copy a length-limited, %NUL-terminated string
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @n: The maximum number of bytes to copy
+ *
+ * The result is not %NUL-terminated if the source exceeds
+ * @n bytes.
+ */
+char *strncpy(char *dest, const char *src, size_t n)
+{
+	size_t len = __strnend(src, n) - src;
+	memset(dest + len, 0, n - len);
+	memcpy(dest, src, len);
+	return dest;
+}
+EXPORT_SYMBOL(strncpy);
+
+/**
+ * strcat - Append one %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ *
+ * returns a pointer to @dest
+ */
+char *strcat(char *dest, const char *src)
+{
+	register int r0 asm("0") = 0;
+	unsigned long dummy;
+	char *ret = dest;
+
+	asm volatile ("0: srst  %0,%1\n"
+		      "   jo    0b\n"
+		      "1: mvst  %0,%2\n"
+		      "   jo    1b"
+		      : "=&a" (dummy), "+a" (dest), "+a" (src)
+		      : "d" (r0), "0" (0UL) : "cc", "memory" );
+	return ret;
+}
+EXPORT_SYMBOL(strcat);
+
+/**
+ * strlcat - Append a length-limited, %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ * @n: The size of the destination buffer.
+ */
+size_t strlcat(char *dest, const char *src, size_t n)
+{
+	size_t dsize = __strend(dest) - dest;
+	size_t len = __strend(src) - src;
+	size_t res = dsize + len;
+
+	if (dsize < n) {
+		dest += dsize;
+		n -= dsize;
+		if (len >= n)
+			len = n - 1;
+		dest[len] = '\0';
+		memcpy(dest, src, len);
+	}
+	return res;
+}
+EXPORT_SYMBOL(strlcat);
+
+/**
+ * strncat - Append a length-limited, %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ * @n: The maximum numbers of bytes to copy
+ *
+ * returns a pointer to @dest
+ *
+ * Note that in contrast to strncpy, strncat ensures the result is
+ * terminated.
+ */
+char *strncat(char *dest, const char *src, size_t n)
+{
+	size_t len = __strnend(src, n) - src;
+	char *p = __strend(dest);
+
+	p[len] = '\0';
+	memcpy(p, src, len);
+	return dest;
+}
+EXPORT_SYMBOL(strncat);
+
+/**
+ * strcmp - Compare two strings
+ * @cs: One string
+ * @ct: Another string
+ *
+ * returns   0 if @cs and @ct are equal,
+ *         < 0 if @cs is less than @ct
+ *         > 0 if @cs is greater than @ct
+ */
+int strcmp(const char *cs, const char *ct)
+{
+	register int r0 asm("0") = 0;
+	int ret = 0;
+
+	asm volatile ("0: clst %2,%3\n"
+		      "   jo   0b\n"
+		      "   je   1f\n"
+		      "   ic   %0,0(%2)\n"
+		      "   ic   %1,0(%3)\n"
+		      "   sr   %0,%1\n"
+		      "1:"
+		      : "+d" (ret), "+d" (r0), "+a" (cs), "+a" (ct)
+		      : : "cc" );
+	return ret;
+}
+EXPORT_SYMBOL(strcmp);
+
+/**
+ * strrchr - Find the last occurrence of a character in a string
+ * @s: The string to be searched
+ * @c: The character to search for
+ */
+char * strrchr(const char * s, int c)
+{
+       size_t len = __strend(s) - s;
+
+       if (len)
+	       do {
+		       if (s[len] == (char) c)
+			       return (char *) s + len;
+	       } while (--len > 0);
+       return NULL;
+}
+EXPORT_SYMBOL(strrchr);
+
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char * strstr(const char * s1,const char * s2)
+{
+	int l1, l2;
+
+	l2 = __strend(s2) - s2;
+	if (!l2)
+		return (char *) s1;
+	l1 = __strend(s1) - s1;
+	while (l1-- >= l2) {
+		register unsigned long r2 asm("2") = (unsigned long) s1;
+		register unsigned long r3 asm("3") = (unsigned long) l2;
+		register unsigned long r4 asm("4") = (unsigned long) s2;
+		register unsigned long r5 asm("5") = (unsigned long) l2;
+		int cc;
+
+		asm volatile ("0: clcle %1,%3,0\n"
+			      "   jo    0b\n"
+			      "   ipm   %0\n"
+			      "   srl   %0,28"
+			      : "=&d" (cc), "+a" (r2), "+a" (r3),
+			        "+a" (r4), "+a" (r5) : : "cc" );
+		if (!cc)
+			return (char *) s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strstr);
+
+/**
+ * memchr - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or %NULL
+ * if @c is not found
+ */
+void *memchr(const void *s, int c, size_t n)
+{
+	register int r0 asm("0") = (char) c;
+	const void *ret = s + n;
+
+	asm volatile ("0: srst  %0,%1\n"
+		      "   jo    0b\n"
+		      "   jl	1f\n"
+		      "   la    %0,0\n"
+		      "1:"
+		      : "+a" (ret), "+&a" (s) : "d" (r0) : "cc" );
+	return (void *) ret;
+}
+EXPORT_SYMBOL(memchr);
+
+/**
+ * memcmp - Compare two areas of memory
+ * @cs: One area of memory
+ * @ct: Another area of memory
+ * @count: The size of the area.
+ */
+int memcmp(const void *cs, const void *ct, size_t n)
+{
+	register unsigned long r2 asm("2") = (unsigned long) cs;
+	register unsigned long r3 asm("3") = (unsigned long) n;
+	register unsigned long r4 asm("4") = (unsigned long) ct;
+	register unsigned long r5 asm("5") = (unsigned long) n;
+	int ret;
+
+	asm volatile ("0: clcle %1,%3,0\n"
+		      "   jo    0b\n"
+		      "   ipm   %0\n"
+		      "   srl   %0,28"
+		      : "=&d" (ret), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5)
+		      : : "cc" );
+	if (ret)
+		ret = *(char *) r2 - *(char *) r4;
+	return ret;
+}
+EXPORT_SYMBOL(memcmp);
+
+/**
+ * memscan - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or 1 byte past
+ * the area if @c is not found
+ */
+void *memscan(void *s, int c, size_t n)
+{
+	register int r0 asm("0") = (char) c;
+	const void *ret = s + n;
+
+	asm volatile ("0: srst  %0,%1\n"
+		      "   jo    0b\n"
+		      : "+a" (ret), "+&a" (s) : "d" (r0) : "cc" );
+	return (void *) ret;
+}
+EXPORT_SYMBOL(memscan);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
new file mode 100644
index 000000000..4614d415b
--- /dev/null
+++ b/arch/s390/lib/uaccess.c
@@ -0,0 +1,392 @@
+/*
+ *  Standard user space access functions based on mvcp/mvcs and doing
+ *  interesting things in the secondary space mode.
+ *
+ *    Copyright IBM Corp. 2006,2014
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Gerald Schaefer (gerald.schaefer@de.ibm.com)
+ */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/facility.h>
+
+static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
+
+static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
+						 unsigned long size)
+{
+	register unsigned long reg0 asm("0") = 0x81UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%2),0(%1),0\n"
+		"9: jz    7f\n"
+		"1: algr  %0,%3\n"
+		"   slgr  %1,%3\n"
+		"   slgr  %2,%3\n"
+		"   j     0b\n"
+		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
+		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
+		"   slgr  %4,%1\n"
+		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   4f\n"
+		"3: .insn ss,0xc80000000000,0(%4,%2),0(%1),0\n"
+		"10:slgr  %0,%4\n"
+		"   algr  %2,%4\n"
+		"4: lghi  %4,-1\n"
+		"   algr  %4,%0\n"	/* copy remaining size, subtract 1 */
+		"   bras  %3,6f\n"	/* memset loop */
+		"   xc    0(1,%2),0(%2)\n"
+		"5: xc    0(256,%2),0(%2)\n"
+		"   la    %2,256(%2)\n"
+		"6: aghi  %4,-256\n"
+		"   jnm   5b\n"
+		"   ex    %4,0(%3)\n"
+		"   j     8f\n"
+		"7:slgr  %0,%0\n"
+		"8:\n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,4b) EX_TABLE(9b,2b) EX_TABLE(10b,4b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
+						unsigned long size)
+{
+	unsigned long tmp1, tmp2;
+
+	load_kernel_asce();
+	tmp1 = -256UL;
+	asm volatile(
+		"   sacf  0\n"
+		"0: mvcp  0(%0,%2),0(%1),%3\n"
+		"10:jz    8f\n"
+		"1: algr  %0,%3\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"2: mvcp  0(%0,%2),0(%1),%3\n"
+		"11:jnz   1b\n"
+		"   j     8f\n"
+		"3: la    %4,255(%1)\n"	/* %4 = ptr + 255 */
+		"   lghi  %3,-4096\n"
+		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
+		"   slgr  %4,%1\n"
+		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   5f\n"
+		"4: mvcp  0(%4,%2),0(%1),%3\n"
+		"12:slgr  %0,%4\n"
+		"   algr  %2,%4\n"
+		"5: lghi  %4,-1\n"
+		"   algr  %4,%0\n"	/* copy remaining size, subtract 1 */
+		"   bras  %3,7f\n"	/* memset loop */
+		"   xc    0(1,%2),0(%2)\n"
+		"6: xc    0(256,%2),0(%2)\n"
+		"   la    %2,256(%2)\n"
+		"7: aghi  %4,-256\n"
+		"   jnm   6b\n"
+		"   ex    %4,0(%3)\n"
+		"   j     9f\n"
+		"8:slgr  %0,%0\n"
+		"9: sacf  768\n"
+		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,5b)
+		EX_TABLE(10b,3b) EX_TABLE(11b,3b) EX_TABLE(12b,5b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (static_key_false(&have_mvcos))
+		return copy_from_user_mvcos(to, from, n);
+	return copy_from_user_mvcp(to, from, n);
+}
+EXPORT_SYMBOL(__copy_from_user);
+
+static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x,
+					       unsigned long size)
+{
+	register unsigned long reg0 asm("0") = 0x810000UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+		"6: jz    4f\n"
+		"1: algr  %0,%3\n"
+		"   slgr  %1,%3\n"
+		"   slgr  %2,%3\n"
+		"   j     0b\n"
+		"2: la    %4,4095(%1)\n"/* %4 = ptr + 4095 */
+		"   nr    %4,%3\n"	/* %4 = (ptr + 4095) & -4096 */
+		"   slgr  %4,%1\n"
+		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   5f\n"
+		"3: .insn ss,0xc80000000000,0(%4,%1),0(%2),0\n"
+		"7: slgr  %0,%4\n"
+		"   j     5f\n"
+		"4: slgr  %0,%0\n"
+		"5:\n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,5b) EX_TABLE(6b,2b) EX_TABLE(7b,5b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
+					      unsigned long size)
+{
+	unsigned long tmp1, tmp2;
+
+	load_kernel_asce();
+	tmp1 = -256UL;
+	asm volatile(
+		"   sacf  0\n"
+		"0: mvcs  0(%0,%1),0(%2),%3\n"
+		"7: jz    5f\n"
+		"1: algr  %0,%3\n"
+		"   la    %1,256(%1)\n"
+		"   la    %2,256(%2)\n"
+		"2: mvcs  0(%0,%1),0(%2),%3\n"
+		"8: jnz   1b\n"
+		"   j     5f\n"
+		"3: la    %4,255(%1)\n" /* %4 = ptr + 255 */
+		"   lghi  %3,-4096\n"
+		"   nr    %4,%3\n"	/* %4 = (ptr + 255) & -4096 */
+		"   slgr  %4,%1\n"
+		"   clgr  %0,%4\n"	/* copy crosses next page boundary? */
+		"   jnh   6f\n"
+		"4: mvcs  0(%4,%1),0(%2),%3\n"
+		"9: slgr  %0,%4\n"
+		"   j     6f\n"
+		"5: slgr  %0,%0\n"
+		"6: sacf  768\n"
+		EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b)
+		EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b)
+		: "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (static_key_false(&have_mvcos))
+		return copy_to_user_mvcos(to, from, n);
+	return copy_to_user_mvcs(to, from, n);
+}
+EXPORT_SYMBOL(__copy_to_user);
+
+static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from,
+					       unsigned long size)
+{
+	register unsigned long reg0 asm("0") = 0x810081UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	/* FIXME: copy with reduced length. */
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%2),0\n"
+		"   jz	  2f\n"
+		"1: algr  %0,%3\n"
+		"   slgr  %1,%3\n"
+		"   slgr  %2,%3\n"
+		"   j	  0b\n"
+		"2:slgr  %0,%0\n"
+		"3: \n"
+		EX_TABLE(0b,3b)
+		: "+a" (size), "+a" (to), "+a" (from), "+a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from,
+					     unsigned long size)
+{
+	unsigned long tmp1;
+
+	load_kernel_asce();
+	asm volatile(
+		"   sacf  256\n"
+		"   aghi  %0,-1\n"
+		"   jo	  5f\n"
+		"   bras  %3,3f\n"
+		"0: aghi  %0,257\n"
+		"1: mvc	  0(1,%1),0(%2)\n"
+		"   la	  %1,1(%1)\n"
+		"   la	  %2,1(%2)\n"
+		"   aghi  %0,-1\n"
+		"   jnz	  1b\n"
+		"   j	  5f\n"
+		"2: mvc	  0(256,%1),0(%2)\n"
+		"   la	  %1,256(%1)\n"
+		"   la	  %2,256(%2)\n"
+		"3: aghi  %0,-256\n"
+		"   jnm	  2b\n"
+		"4: ex	  %0,1b-0b(%3)\n"
+		"5: slgr  %0,%0\n"
+		"6: sacf  768\n"
+		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+		: "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1)
+		: : "cc", "memory");
+	return size;
+}
+
+unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+	if (static_key_false(&have_mvcos))
+		return copy_in_user_mvcos(to, from, n);
+	return copy_in_user_mvc(to, from, n);
+}
+EXPORT_SYMBOL(__copy_in_user);
+
+static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size)
+{
+	register unsigned long reg0 asm("0") = 0x810000UL;
+	unsigned long tmp1, tmp2;
+
+	tmp1 = -4096UL;
+	asm volatile(
+		"0: .insn ss,0xc80000000000,0(%0,%1),0(%4),0\n"
+		"   jz	  4f\n"
+		"1: algr  %0,%2\n"
+		"   slgr  %1,%2\n"
+		"   j	  0b\n"
+		"2: la	  %3,4095(%1)\n"/* %4 = to + 4095 */
+		"   nr	  %3,%2\n"	/* %4 = (to + 4095) & -4096 */
+		"   slgr  %3,%1\n"
+		"   clgr  %0,%3\n"	/* copy crosses next page boundary? */
+		"   jnh	  5f\n"
+		"3: .insn ss,0xc80000000000,0(%3,%1),0(%4),0\n"
+		"   slgr  %0,%3\n"
+		"   j	  5f\n"
+		"4:slgr  %0,%0\n"
+		"5:\n"
+		EX_TABLE(0b,2b) EX_TABLE(3b,5b)
+		: "+a" (size), "+a" (to), "+a" (tmp1), "=a" (tmp2)
+		: "a" (empty_zero_page), "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
+{
+	unsigned long tmp1, tmp2;
+
+	load_kernel_asce();
+	asm volatile(
+		"   sacf  256\n"
+		"   aghi  %0,-1\n"
+		"   jo    5f\n"
+		"   bras  %3,3f\n"
+		"   xc    0(1,%1),0(%1)\n"
+		"0: aghi  %0,257\n"
+		"   la    %2,255(%1)\n" /* %2 = ptr + 255 */
+		"   srl   %2,12\n"
+		"   sll   %2,12\n"	/* %2 = (ptr + 255) & -4096 */
+		"   slgr  %2,%1\n"
+		"   clgr  %0,%2\n"	/* clear crosses next page boundary? */
+		"   jnh   5f\n"
+		"   aghi  %2,-1\n"
+		"1: ex    %2,0(%3)\n"
+		"   aghi  %2,1\n"
+		"   slgr  %0,%2\n"
+		"   j     5f\n"
+		"2: xc    0(256,%1),0(%1)\n"
+		"   la    %1,256(%1)\n"
+		"3: aghi  %0,-256\n"
+		"   jnm   2b\n"
+		"4: ex    %0,0(%3)\n"
+		"5: slgr  %0,%0\n"
+		"6: sacf  768\n"
+		EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b)
+		: "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2)
+		: : "cc", "memory");
+	return size;
+}
+
+unsigned long __clear_user(void __user *to, unsigned long size)
+{
+	if (static_key_false(&have_mvcos))
+			return clear_user_mvcos(to, size);
+	return clear_user_xc(to, size);
+}
+EXPORT_SYMBOL(__clear_user);
+
+static inline unsigned long strnlen_user_srst(const char __user *src,
+					      unsigned long size)
+{
+	register unsigned long reg0 asm("0") = 0;
+	unsigned long tmp1, tmp2;
+
+	asm volatile(
+		"   la    %2,0(%1)\n"
+		"   la    %3,0(%0,%1)\n"
+		"   slgr  %0,%0\n"
+		"   sacf  256\n"
+		"0: srst  %3,%2\n"
+		"   jo    0b\n"
+		"   la    %0,1(%3)\n"	/* strnlen_user results includes \0 */
+		"   slgr  %0,%1\n"
+		"1: sacf  768\n"
+		EX_TABLE(0b,1b)
+		: "+a" (size), "+a" (src), "=a" (tmp1), "=a" (tmp2)
+		: "d" (reg0) : "cc", "memory");
+	return size;
+}
+
+unsigned long __strnlen_user(const char __user *src, unsigned long size)
+{
+	if (unlikely(!size))
+		return 0;
+	load_kernel_asce();
+	return strnlen_user_srst(src, size);
+}
+EXPORT_SYMBOL(__strnlen_user);
+
+long __strncpy_from_user(char *dst, const char __user *src, long size)
+{
+	size_t done, len, offset, len_str;
+
+	if (unlikely(size <= 0))
+		return 0;
+	done = 0;
+	do {
+		offset = (size_t)src & ~PAGE_MASK;
+		len = min(size - done, PAGE_SIZE - offset);
+		if (copy_from_user(dst, src, len))
+			return -EFAULT;
+		len_str = strnlen(dst, len);
+		done += len_str;
+		src += len_str;
+		dst += len_str;
+	} while ((len_str == len) && (done < size));
+	return done;
+}
+EXPORT_SYMBOL(__strncpy_from_user);
+
+/*
+ * The "old" uaccess variant without mvcos can be enforced with the
+ * uaccess_primary kernel parameter. This is mainly for debugging purposes.
+ */
+static int uaccess_primary __initdata;
+
+static int __init parse_uaccess_pt(char *__unused)
+{
+	uaccess_primary = 1;
+	return 0;
+}
+early_param("uaccess_primary", parse_uaccess_pt);
+
+static int __init uaccess_init(void)
+{
+	if (!uaccess_primary && test_facility(27))
+		static_key_slow_inc(&have_mvcos);
+	return 0;
+}
+early_initcall(uaccess_init);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
new file mode 100644
index 000000000..839592ca2
--- /dev/null
+++ b/arch/s390/mm/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the linux s390-specific parts of the memory manager.
+#
+
+obj-y		:= init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o
+obj-y		+= page-states.o gup.o extable.o pageattr.o mem_detect.o
+
+obj-$(CONFIG_CMM)		+= cmm.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_S390_PTDUMP)	+= dump_pagetables.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
new file mode 100644
index 000000000..79ddd580d
--- /dev/null
+++ b/arch/s390/mm/cmm.c
@@ -0,0 +1,495 @@
+/*
+ *  Collaborative memory management interface.
+ *
+ *    Copyright IBM Corp 2003, 2010
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/sysctl.h>
+#include <linux/ctype.h>
+#include <linux/swap.h>
+#include <linux/kthread.h>
+#include <linux/oom.h>
+#include <linux/suspend.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgalloc.h>
+#include <asm/diag.h>
+
+#ifdef CONFIG_CMM_IUCV
+static char *cmm_default_sender = "VMRMSVM";
+#endif
+static char *sender;
+module_param(sender, charp, 0400);
+MODULE_PARM_DESC(sender,
+		 "Guest name that may send SMSG messages (default VMRMSVM)");
+
+#include "../../../drivers/s390/net/smsgiucv.h"
+
+#define CMM_NR_PAGES ((PAGE_SIZE / sizeof(unsigned long)) - 2)
+
+struct cmm_page_array {
+	struct cmm_page_array *next;
+	unsigned long index;
+	unsigned long pages[CMM_NR_PAGES];
+};
+
+static long cmm_pages;
+static long cmm_timed_pages;
+static volatile long cmm_pages_target;
+static volatile long cmm_timed_pages_target;
+static long cmm_timeout_pages;
+static long cmm_timeout_seconds;
+static int cmm_suspended;
+
+static struct cmm_page_array *cmm_page_list;
+static struct cmm_page_array *cmm_timed_page_list;
+static DEFINE_SPINLOCK(cmm_lock);
+
+static struct task_struct *cmm_thread_ptr;
+static DECLARE_WAIT_QUEUE_HEAD(cmm_thread_wait);
+static DEFINE_TIMER(cmm_timer, NULL, 0, 0);
+
+static void cmm_timer_fn(unsigned long);
+static void cmm_set_timer(void);
+
+static long cmm_alloc_pages(long nr, long *counter,
+			    struct cmm_page_array **list)
+{
+	struct cmm_page_array *pa, *npa;
+	unsigned long addr;
+
+	while (nr) {
+		addr = __get_free_page(GFP_NOIO);
+		if (!addr)
+			break;
+		spin_lock(&cmm_lock);
+		pa = *list;
+		if (!pa || pa->index >= CMM_NR_PAGES) {
+			/* Need a new page for the page list. */
+			spin_unlock(&cmm_lock);
+			npa = (struct cmm_page_array *)
+				__get_free_page(GFP_NOIO);
+			if (!npa) {
+				free_page(addr);
+				break;
+			}
+			spin_lock(&cmm_lock);
+			pa = *list;
+			if (!pa || pa->index >= CMM_NR_PAGES) {
+				npa->next = pa;
+				npa->index = 0;
+				pa = npa;
+				*list = pa;
+			} else
+				free_page((unsigned long) npa);
+		}
+		diag10_range(addr >> PAGE_SHIFT, 1);
+		pa->pages[pa->index++] = addr;
+		(*counter)++;
+		spin_unlock(&cmm_lock);
+		nr--;
+	}
+	return nr;
+}
+
+static long cmm_free_pages(long nr, long *counter, struct cmm_page_array **list)
+{
+	struct cmm_page_array *pa;
+	unsigned long addr;
+
+	spin_lock(&cmm_lock);
+	pa = *list;
+	while (nr) {
+		if (!pa || pa->index <= 0)
+			break;
+		addr = pa->pages[--pa->index];
+		if (pa->index == 0) {
+			pa = pa->next;
+			free_page((unsigned long) *list);
+			*list = pa;
+		}
+		free_page(addr);
+		(*counter)--;
+		nr--;
+	}
+	spin_unlock(&cmm_lock);
+	return nr;
+}
+
+static int cmm_oom_notify(struct notifier_block *self,
+			  unsigned long dummy, void *parm)
+{
+	unsigned long *freed = parm;
+	long nr = 256;
+
+	nr = cmm_free_pages(nr, &cmm_timed_pages, &cmm_timed_page_list);
+	if (nr > 0)
+		nr = cmm_free_pages(nr, &cmm_pages, &cmm_page_list);
+	cmm_pages_target = cmm_pages;
+	cmm_timed_pages_target = cmm_timed_pages;
+	*freed += 256 - nr;
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cmm_oom_nb = {
+	.notifier_call = cmm_oom_notify,
+};
+
+static int cmm_thread(void *dummy)
+{
+	int rc;
+
+	while (1) {
+		rc = wait_event_interruptible(cmm_thread_wait,
+			(!cmm_suspended && (cmm_pages != cmm_pages_target ||
+			 cmm_timed_pages != cmm_timed_pages_target)) ||
+			 kthread_should_stop());
+		if (kthread_should_stop() || rc == -ERESTARTSYS) {
+			cmm_pages_target = cmm_pages;
+			cmm_timed_pages_target = cmm_timed_pages;
+			break;
+		}
+		if (cmm_pages_target > cmm_pages) {
+			if (cmm_alloc_pages(1, &cmm_pages, &cmm_page_list))
+				cmm_pages_target = cmm_pages;
+		} else if (cmm_pages_target < cmm_pages) {
+			cmm_free_pages(1, &cmm_pages, &cmm_page_list);
+		}
+		if (cmm_timed_pages_target > cmm_timed_pages) {
+			if (cmm_alloc_pages(1, &cmm_timed_pages,
+					   &cmm_timed_page_list))
+				cmm_timed_pages_target = cmm_timed_pages;
+		} else if (cmm_timed_pages_target < cmm_timed_pages) {
+			cmm_free_pages(1, &cmm_timed_pages,
+				       &cmm_timed_page_list);
+		}
+		if (cmm_timed_pages > 0 && !timer_pending(&cmm_timer))
+			cmm_set_timer();
+	}
+	return 0;
+}
+
+static void cmm_kick_thread(void)
+{
+	wake_up(&cmm_thread_wait);
+}
+
+static void cmm_set_timer(void)
+{
+	if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) {
+		if (timer_pending(&cmm_timer))
+			del_timer(&cmm_timer);
+		return;
+	}
+	if (timer_pending(&cmm_timer)) {
+		if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ))
+			return;
+	}
+	cmm_timer.function = cmm_timer_fn;
+	cmm_timer.data = 0;
+	cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ;
+	add_timer(&cmm_timer);
+}
+
+static void cmm_timer_fn(unsigned long ignored)
+{
+	long nr;
+
+	nr = cmm_timed_pages_target - cmm_timeout_pages;
+	if (nr < 0)
+		cmm_timed_pages_target = 0;
+	else
+		cmm_timed_pages_target = nr;
+	cmm_kick_thread();
+	cmm_set_timer();
+}
+
+static void cmm_set_pages(long nr)
+{
+	cmm_pages_target = nr;
+	cmm_kick_thread();
+}
+
+static long cmm_get_pages(void)
+{
+	return cmm_pages;
+}
+
+static void cmm_add_timed_pages(long nr)
+{
+	cmm_timed_pages_target += nr;
+	cmm_kick_thread();
+}
+
+static long cmm_get_timed_pages(void)
+{
+	return cmm_timed_pages;
+}
+
+static void cmm_set_timeout(long nr, long seconds)
+{
+	cmm_timeout_pages = nr;
+	cmm_timeout_seconds = seconds;
+	cmm_set_timer();
+}
+
+static int cmm_skip_blanks(char *cp, char **endp)
+{
+	char *str;
+
+	for (str = cp; *str == ' ' || *str == '\t'; str++)
+		;
+	*endp = str;
+	return str != cp;
+}
+
+static struct ctl_table cmm_table[];
+
+static int cmm_pages_handler(struct ctl_table *ctl, int write,
+			     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char buf[16], *p;
+	unsigned int len;
+	long nr;
+
+	if (!*lenp || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	if (write) {
+		len = *lenp;
+		if (copy_from_user(buf, buffer,
+				   len > sizeof(buf) ? sizeof(buf) : len))
+			return -EFAULT;
+		buf[sizeof(buf) - 1] = '\0';
+		cmm_skip_blanks(buf, &p);
+		nr = simple_strtoul(p, &p, 0);
+		if (ctl == &cmm_table[0])
+			cmm_set_pages(nr);
+		else
+			cmm_add_timed_pages(nr);
+	} else {
+		if (ctl == &cmm_table[0])
+			nr = cmm_get_pages();
+		else
+			nr = cmm_get_timed_pages();
+		len = sprintf(buf, "%ld\n", nr);
+		if (len > *lenp)
+			len = *lenp;
+		if (copy_to_user(buffer, buf, len))
+			return -EFAULT;
+	}
+	*lenp = len;
+	*ppos += len;
+	return 0;
+}
+
+static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+			       void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	char buf[64], *p;
+	long nr, seconds;
+	unsigned int len;
+
+	if (!*lenp || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	if (write) {
+		len = *lenp;
+		if (copy_from_user(buf, buffer,
+				   len > sizeof(buf) ? sizeof(buf) : len))
+			return -EFAULT;
+		buf[sizeof(buf) - 1] = '\0';
+		cmm_skip_blanks(buf, &p);
+		nr = simple_strtoul(p, &p, 0);
+		cmm_skip_blanks(p, &p);
+		seconds = simple_strtoul(p, &p, 0);
+		cmm_set_timeout(nr, seconds);
+	} else {
+		len = sprintf(buf, "%ld %ld\n",
+			      cmm_timeout_pages, cmm_timeout_seconds);
+		if (len > *lenp)
+			len = *lenp;
+		if (copy_to_user(buffer, buf, len))
+			return -EFAULT;
+	}
+	*lenp = len;
+	*ppos += len;
+	return 0;
+}
+
+static struct ctl_table cmm_table[] = {
+	{
+		.procname	= "cmm_pages",
+		.mode		= 0644,
+		.proc_handler	= cmm_pages_handler,
+	},
+	{
+		.procname	= "cmm_timed_pages",
+		.mode		= 0644,
+		.proc_handler	= cmm_pages_handler,
+	},
+	{
+		.procname	= "cmm_timeout",
+		.mode		= 0644,
+		.proc_handler	= cmm_timeout_handler,
+	},
+	{ }
+};
+
+static struct ctl_table cmm_dir_table[] = {
+	{
+		.procname	= "vm",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= cmm_table,
+	},
+	{ }
+};
+
+#ifdef CONFIG_CMM_IUCV
+#define SMSG_PREFIX "CMM"
+static void cmm_smsg_target(const char *from, char *msg)
+{
+	long nr, seconds;
+
+	if (strlen(sender) > 0 && strcmp(from, sender) != 0)
+		return;
+	if (!cmm_skip_blanks(msg + strlen(SMSG_PREFIX), &msg))
+		return;
+	if (strncmp(msg, "SHRINK", 6) == 0) {
+		if (!cmm_skip_blanks(msg + 6, &msg))
+			return;
+		nr = simple_strtoul(msg, &msg, 0);
+		cmm_skip_blanks(msg, &msg);
+		if (*msg == '\0')
+			cmm_set_pages(nr);
+	} else if (strncmp(msg, "RELEASE", 7) == 0) {
+		if (!cmm_skip_blanks(msg + 7, &msg))
+			return;
+		nr = simple_strtoul(msg, &msg, 0);
+		cmm_skip_blanks(msg, &msg);
+		if (*msg == '\0')
+			cmm_add_timed_pages(nr);
+	} else if (strncmp(msg, "REUSE", 5) == 0) {
+		if (!cmm_skip_blanks(msg + 5, &msg))
+			return;
+		nr = simple_strtoul(msg, &msg, 0);
+		if (!cmm_skip_blanks(msg, &msg))
+			return;
+		seconds = simple_strtoul(msg, &msg, 0);
+		cmm_skip_blanks(msg, &msg);
+		if (*msg == '\0')
+			cmm_set_timeout(nr, seconds);
+	}
+}
+#endif
+
+static struct ctl_table_header *cmm_sysctl_header;
+
+static int cmm_suspend(void)
+{
+	cmm_suspended = 1;
+	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
+	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
+	return 0;
+}
+
+static int cmm_resume(void)
+{
+	cmm_suspended = 0;
+	cmm_kick_thread();
+	return 0;
+}
+
+static int cmm_power_event(struct notifier_block *this,
+			   unsigned long event, void *ptr)
+{
+	switch (event) {
+	case PM_POST_HIBERNATION:
+		return cmm_resume();
+	case PM_HIBERNATION_PREPARE:
+		return cmm_suspend();
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static struct notifier_block cmm_power_notifier = {
+	.notifier_call = cmm_power_event,
+};
+
+static int __init cmm_init(void)
+{
+	int rc = -ENOMEM;
+
+	cmm_sysctl_header = register_sysctl_table(cmm_dir_table);
+	if (!cmm_sysctl_header)
+		goto out_sysctl;
+#ifdef CONFIG_CMM_IUCV
+	/* convert sender to uppercase characters */
+	if (sender) {
+		int len = strlen(sender);
+		while (len--)
+			sender[len] = toupper(sender[len]);
+	} else {
+		sender = cmm_default_sender;
+	}
+
+	rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target);
+	if (rc < 0)
+		goto out_smsg;
+#endif
+	rc = register_oom_notifier(&cmm_oom_nb);
+	if (rc < 0)
+		goto out_oom_notify;
+	rc = register_pm_notifier(&cmm_power_notifier);
+	if (rc)
+		goto out_pm;
+	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+	if (!IS_ERR(cmm_thread_ptr))
+		return 0;
+
+	rc = PTR_ERR(cmm_thread_ptr);
+	unregister_pm_notifier(&cmm_power_notifier);
+out_pm:
+	unregister_oom_notifier(&cmm_oom_nb);
+out_oom_notify:
+#ifdef CONFIG_CMM_IUCV
+	smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
+out_smsg:
+#endif
+	unregister_sysctl_table(cmm_sysctl_header);
+out_sysctl:
+	del_timer_sync(&cmm_timer);
+	return rc;
+}
+module_init(cmm_init);
+
+static void __exit cmm_exit(void)
+{
+	unregister_sysctl_table(cmm_sysctl_header);
+#ifdef CONFIG_CMM_IUCV
+	smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
+#endif
+	unregister_pm_notifier(&cmm_power_notifier);
+	unregister_oom_notifier(&cmm_oom_nb);
+	kthread_stop(cmm_thread_ptr);
+	del_timer_sync(&cmm_timer);
+	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
+	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
+}
+module_exit(cmm_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
new file mode 100644
index 000000000..8556d6be9
--- /dev/null
+++ b/arch/s390/mm/dump_pagetables.c
@@ -0,0 +1,225 @@
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/sections.h>
+#include <asm/pgtable.h>
+
+static unsigned long max_addr;
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+enum address_markers_idx {
+	IDENTITY_NR = 0,
+	KERNEL_START_NR,
+	KERNEL_END_NR,
+	VMEMMAP_NR,
+	VMALLOC_NR,
+	MODULES_NR,
+};
+
+static struct addr_marker address_markers[] = {
+	[IDENTITY_NR]	  = {0, "Identity Mapping"},
+	[KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
+	[KERNEL_END_NR]	  = {(unsigned long)&_end, "Kernel Image End"},
+	[VMEMMAP_NR]	  = {0, "vmemmap Area"},
+	[VMALLOC_NR]	  = {0, "vmalloc Area"},
+	[MODULES_NR]	  = {0, "Modules Area"},
+	{ -1, NULL }
+};
+
+struct pg_state {
+	int level;
+	unsigned int current_prot;
+	unsigned long start_address;
+	unsigned long current_address;
+	const struct addr_marker *marker;
+};
+
+static void print_prot(struct seq_file *m, unsigned int pr, int level)
+{
+	static const char * const level_name[] =
+		{ "ASCE", "PGD", "PUD", "PMD", "PTE" };
+
+	seq_printf(m, "%s ", level_name[level]);
+	if (pr & _PAGE_INVALID) {
+		seq_printf(m, "I\n");
+		return;
+	}
+	seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
+	seq_putc(m, '\n');
+}
+
+static void note_page(struct seq_file *m, struct pg_state *st,
+		     unsigned int new_prot, int level)
+{
+	static const char units[] = "KMGTPE";
+	int width = sizeof(unsigned long) * 2;
+	const char *unit = units;
+	unsigned int prot, cur;
+	unsigned long delta;
+
+	/*
+	 * If we have a "break" in the series, we need to flush the state
+	 * that we have now. "break" is either changing perms, levels or
+	 * address space marker.
+	 */
+	prot = new_prot;
+	cur = st->current_prot;
+
+	if (!st->level) {
+		/* First entry */
+		st->current_prot = new_prot;
+		st->level = level;
+		st->marker = address_markers;
+		seq_printf(m, "---[ %s ]---\n", st->marker->name);
+	} else if (prot != cur || level != st->level ||
+		   st->current_address >= st->marker[1].start_address) {
+		/* Print the actual finished series */
+		seq_printf(m, "0x%0*lx-0x%0*lx",
+			   width, st->start_address,
+			   width, st->current_address);
+		delta = (st->current_address - st->start_address) >> 10;
+		while (!(delta & 0x3ff) && unit[1]) {
+			delta >>= 10;
+			unit++;
+		}
+		seq_printf(m, "%9lu%c ", delta, *unit);
+		print_prot(m, st->current_prot, st->level);
+		if (st->current_address >= st->marker[1].start_address) {
+			st->marker++;
+			seq_printf(m, "---[ %s ]---\n", st->marker->name);
+		}
+		st->start_address = st->current_address;
+		st->current_prot = new_prot;
+		st->level = level;
+	}
+}
+
+/*
+ * The actual page table walker functions. In order to keep the
+ * implementation of print_prot() short, we only check and pass
+ * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
+ * segment or page table entry is invalid or read-only.
+ * After all it's just a hint that the current level being walked
+ * contains an invalid or read-only entry.
+ */
+static void walk_pte_level(struct seq_file *m, struct pg_state *st,
+			   pmd_t *pmd, unsigned long addr)
+{
+	unsigned int prot;
+	pte_t *pte;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
+		st->current_address = addr;
+		pte = pte_offset_kernel(pmd, addr);
+		prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
+		note_page(m, st, prot, 4);
+		addr += PAGE_SIZE;
+	}
+}
+
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
+			   pud_t *pud, unsigned long addr)
+{
+	unsigned int prot;
+	pmd_t *pmd;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
+		st->current_address = addr;
+		pmd = pmd_offset(pud, addr);
+		if (!pmd_none(*pmd)) {
+			if (pmd_large(*pmd)) {
+				prot = pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT;
+				note_page(m, st, prot, 3);
+			} else
+				walk_pte_level(m, st, pmd, addr);
+		} else
+			note_page(m, st, _PAGE_INVALID, 3);
+		addr += PMD_SIZE;
+	}
+}
+
+static void walk_pud_level(struct seq_file *m, struct pg_state *st,
+			   pgd_t *pgd, unsigned long addr)
+{
+	unsigned int prot;
+	pud_t *pud;
+	int i;
+
+	for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
+		st->current_address = addr;
+		pud = pud_offset(pgd, addr);
+		if (!pud_none(*pud))
+			if (pud_large(*pud)) {
+				prot = pud_val(*pud) & _REGION3_ENTRY_RO;
+				note_page(m, st, prot, 2);
+			} else
+				walk_pmd_level(m, st, pud, addr);
+		else
+			note_page(m, st, _PAGE_INVALID, 2);
+		addr += PUD_SIZE;
+	}
+}
+
+static void walk_pgd_level(struct seq_file *m)
+{
+	unsigned long addr = 0;
+	struct pg_state st;
+	pgd_t *pgd;
+	int i;
+
+	memset(&st, 0, sizeof(st));
+	for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
+		st.current_address = addr;
+		pgd = pgd_offset_k(addr);
+		if (!pgd_none(*pgd))
+			walk_pud_level(m, &st, pgd, addr);
+		else
+			note_page(m, &st, _PAGE_INVALID, 1);
+		addr += PGDIR_SIZE;
+	}
+	/* Flush out the last page */
+	st.current_address = max_addr;
+	note_page(m, &st, 0, 0);
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	walk_pgd_level(m);
+	return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+	.open		= ptdump_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int pt_dump_init(void)
+{
+	/*
+	 * Figure out the maximum virtual address being accessible with the
+	 * kernel ASCE. We need this to keep the page table walker functions
+	 * from accessing non-existent entries.
+	 */
+	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
+	max_addr = 1UL << (max_addr * 11 + 31);
+	address_markers[MODULES_NR].start_address = MODULES_VADDR;
+	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
+	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+	return 0;
+}
+device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
new file mode 100644
index 000000000..4d1ee8886
--- /dev/null
+++ b/arch/s390/mm/extable.c
@@ -0,0 +1,81 @@
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <asm/uaccess.h>
+
+/*
+ * Search one exception table for an entry corresponding to the
+ * given instruction address, and return the address of the entry,
+ * or NULL if none is found.
+ * We use a binary search, and thus we assume that the table is
+ * already sorted.
+ */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+	       const struct exception_table_entry *last,
+	       unsigned long value)
+{
+	const struct exception_table_entry *mid;
+	unsigned long addr;
+
+	while (first <= last) {
+		mid = ((last - first) >> 1) + first;
+		addr = extable_insn(mid);
+		if (addr < value)
+			first = mid + 1;
+		else if (addr > value)
+			last = mid - 1;
+		else
+			return mid;
+	}
+	return NULL;
+}
+
+/*
+ * The exception table needs to be sorted so that the binary
+ * search that we use to find entries in it works properly.
+ * This is used both for the kernel exception table and for
+ * the exception tables of modules that get loaded.
+ *
+ */
+static int cmp_ex(const void *a, const void *b)
+{
+	const struct exception_table_entry *x = a, *y = b;
+
+	/* This compare is only valid after normalization. */
+	return x->insn - y->insn;
+}
+
+void sort_extable(struct exception_table_entry *start,
+		  struct exception_table_entry *finish)
+{
+	struct exception_table_entry *p;
+	int i;
+
+	/* Normalize entries to being relative to the start of the section */
+	for (p = start, i = 0; p < finish; p++, i += 8)
+		p->insn += i;
+	sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
+	/* Denormalize all entries */
+	for (p = start, i = 0; p < finish; p++, i += 8)
+		p->insn -= i;
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * If the exception table is sorted, any referring to the module init
+ * will be at the beginning or the end.
+ */
+void trim_init_extable(struct module *m)
+{
+	/* Trim the beginning */
+	while (m->num_exentries &&
+	       within_module_init(extable_insn(&m->extable[0]), m)) {
+		m->extable++;
+		m->num_exentries--;
+	}
+	/* Trim the end */
+	while (m->num_exentries &&
+	       within_module_init(extable_insn(&m->extable[m->num_exentries-1]), m))
+		m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
new file mode 100644
index 000000000..23c496957
--- /dev/null
+++ b/arch/s390/mm/extmem.c
@@ -0,0 +1,763 @@
+/*
+ * Author(s)......: Carsten Otte <cotte@de.ibm.com>
+ * 		    Rob M van der Heij <rvdheij@nl.ibm.com>
+ * 		    Steven Shultz <shultzss@us.ibm.com>
+ * Bugreports.to..: <Linux390@de.ibm.com>
+ * Copyright IBM Corp. 2002, 2004
+ */
+
+#define KMSG_COMPONENT "extmem"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/bootmem.h>
+#include <linux/ctype.h>
+#include <linux/ioport.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/ebcdic.h>
+#include <asm/errno.h>
+#include <asm/extmem.h>
+#include <asm/cpcmd.h>
+#include <asm/setup.h>
+
+#define DCSS_LOADSHR    0x00
+#define DCSS_LOADNSR    0x04
+#define DCSS_PURGESEG   0x08
+#define DCSS_FINDSEG    0x0c
+#define DCSS_LOADNOLY   0x10
+#define DCSS_SEGEXT     0x18
+#define DCSS_LOADSHRX	0x20
+#define DCSS_LOADNSRX	0x24
+#define DCSS_FINDSEGX	0x2c
+#define DCSS_SEGEXTX	0x38
+#define DCSS_FINDSEGA   0x0c
+
+struct qrange {
+	unsigned long  start; /* last byte type */
+	unsigned long  end;   /* last byte reserved */
+};
+
+struct qout64 {
+	unsigned long segstart;
+	unsigned long segend;
+	int segcnt;
+	int segrcnt;
+	struct qrange range[6];
+};
+
+struct qrange_old {
+	unsigned int start; /* last byte type */
+	unsigned int end;   /* last byte reserved */
+};
+
+/* output area format for the Diag x'64' old subcode x'18' */
+struct qout64_old {
+	int segstart;
+	int segend;
+	int segcnt;
+	int segrcnt;
+	struct qrange_old range[6];
+};
+
+struct qin64 {
+	char qopcode;
+	char rsrv1[3];
+	char qrcode;
+	char rsrv2[3];
+	char qname[8];
+	unsigned int qoutptr;
+	short int qoutlen;
+};
+
+struct dcss_segment {
+	struct list_head list;
+	char dcss_name[8];
+	char res_name[15];
+	unsigned long start_addr;
+	unsigned long end;
+	atomic_t ref_count;
+	int do_nonshared;
+	unsigned int vm_segtype;
+	struct qrange range[6];
+	int segcnt;
+	struct resource *res;
+};
+
+static DEFINE_MUTEX(dcss_lock);
+static LIST_HEAD(dcss_list);
+static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
+					"EW/EN-MIXED" };
+static int loadshr_scode, loadnsr_scode, findseg_scode;
+static int segext_scode, purgeseg_scode;
+static int scode_set;
+
+/* set correct Diag x'64' subcodes. */
+static int
+dcss_set_subcodes(void)
+{
+	char *name = kmalloc(8 * sizeof(char), GFP_KERNEL | GFP_DMA);
+	unsigned long rx, ry;
+	int rc;
+
+	if (name == NULL)
+		return -ENOMEM;
+
+	rx = (unsigned long) name;
+	ry = DCSS_FINDSEGX;
+
+	strcpy(name, "dummy");
+	asm volatile(
+		"	diag	%0,%1,0x64\n"
+		"0:	ipm	%2\n"
+		"	srl	%2,28\n"
+		"	j	2f\n"
+		"1:	la	%2,3\n"
+		"2:\n"
+		EX_TABLE(0b, 1b)
+		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+
+	kfree(name);
+	/* Diag x'64' new subcodes are supported, set to new subcodes */
+	if (rc != 3) {
+		loadshr_scode = DCSS_LOADSHRX;
+		loadnsr_scode = DCSS_LOADNSRX;
+		purgeseg_scode = DCSS_PURGESEG;
+		findseg_scode = DCSS_FINDSEGX;
+		segext_scode = DCSS_SEGEXTX;
+		return 0;
+	}
+	/* Diag x'64' new subcodes are not supported, set to old subcodes */
+	loadshr_scode = DCSS_LOADNOLY;
+	loadnsr_scode = DCSS_LOADNSR;
+	purgeseg_scode = DCSS_PURGESEG;
+	findseg_scode = DCSS_FINDSEG;
+	segext_scode = DCSS_SEGEXT;
+	return 0;
+}
+
+/*
+ * Create the 8 bytes, ebcdic VM segment name from
+ * an ascii name.
+ */
+static void
+dcss_mkname(char *name, char *dcss_name)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		if (name[i] == '\0')
+			break;
+		dcss_name[i] = toupper(name[i]);
+	};
+	for (; i < 8; i++)
+		dcss_name[i] = ' ';
+	ASCEBC(dcss_name, 8);
+}
+
+
+/*
+ * search all segments in dcss_list, and return the one
+ * namend *name. If not found, return NULL.
+ */
+static struct dcss_segment *
+segment_by_name (char *name)
+{
+	char dcss_name[9];
+	struct list_head *l;
+	struct dcss_segment *tmp, *retval = NULL;
+
+	BUG_ON(!mutex_is_locked(&dcss_lock));
+	dcss_mkname (name, dcss_name);
+	list_for_each (l, &dcss_list) {
+		tmp = list_entry (l, struct dcss_segment, list);
+		if (memcmp(tmp->dcss_name, dcss_name, 8) == 0) {
+			retval = tmp;
+			break;
+		}
+	}
+	return retval;
+}
+
+
+/*
+ * Perform a function on a dcss segment.
+ */
+static inline int
+dcss_diag(int *func, void *parameter,
+           unsigned long *ret1, unsigned long *ret2)
+{
+	unsigned long rx, ry;
+	int rc;
+
+	if (scode_set == 0) {
+		rc = dcss_set_subcodes();
+		if (rc < 0)
+			return rc;
+		scode_set = 1;
+	}
+	rx = (unsigned long) parameter;
+	ry = (unsigned long) *func;
+
+	/* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
+	if (*func > DCSS_SEGEXT)
+		asm volatile(
+			"	diag	%0,%1,0x64\n"
+			"	ipm	%2\n"
+			"	srl	%2,28\n"
+			: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+	/* 31-bit Diag x'64' old subcode, switch to 31-bit addressing mode */
+	else
+		asm volatile(
+			"	sam31\n"
+			"	diag	%0,%1,0x64\n"
+			"	sam64\n"
+			"	ipm	%2\n"
+			"	srl	%2,28\n"
+			: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
+	*ret1 = rx;
+	*ret2 = ry;
+	return rc;
+}
+
+static inline int
+dcss_diag_translate_rc (int vm_rc) {
+	if (vm_rc == 44)
+		return -ENOENT;
+	return -EIO;
+}
+
+
+/* do a diag to get info about a segment.
+ * fills start_address, end and vm_segtype fields
+ */
+static int
+query_segment_type (struct dcss_segment *seg)
+{
+	unsigned long dummy, vmrc;
+	int diag_cc, rc, i;
+	struct qout64 *qout;
+	struct qin64 *qin;
+
+	qin = kmalloc(sizeof(*qin), GFP_KERNEL | GFP_DMA);
+	qout = kmalloc(sizeof(*qout), GFP_KERNEL | GFP_DMA);
+	if ((qin == NULL) || (qout == NULL)) {
+		rc = -ENOMEM;
+		goto out_free;
+	}
+
+	/* initialize diag input parameters */
+	qin->qopcode = DCSS_FINDSEGA;
+	qin->qoutptr = (unsigned long) qout;
+	qin->qoutlen = sizeof(struct qout64);
+	memcpy (qin->qname, seg->dcss_name, 8);
+
+	diag_cc = dcss_diag(&segext_scode, qin, &dummy, &vmrc);
+
+	if (diag_cc < 0) {
+		rc = diag_cc;
+		goto out_free;
+	}
+	if (diag_cc > 1) {
+		pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc);
+		rc = dcss_diag_translate_rc (vmrc);
+		goto out_free;
+	}
+
+	/* Only old format of output area of Diagnose x'64' is supported,
+	   copy data for the new format. */
+	if (segext_scode == DCSS_SEGEXT) {
+		struct qout64_old *qout_old;
+		qout_old = kzalloc(sizeof(*qout_old), GFP_KERNEL | GFP_DMA);
+		if (qout_old == NULL) {
+			rc = -ENOMEM;
+			goto out_free;
+		}
+		memcpy(qout_old, qout, sizeof(struct qout64_old));
+		qout->segstart = (unsigned long) qout_old->segstart;
+		qout->segend = (unsigned long) qout_old->segend;
+		qout->segcnt = qout_old->segcnt;
+		qout->segrcnt = qout_old->segrcnt;
+
+		if (qout->segcnt > 6)
+			qout->segrcnt = 6;
+		for (i = 0; i < qout->segrcnt; i++) {
+			qout->range[i].start =
+				(unsigned long) qout_old->range[i].start;
+			qout->range[i].end =
+				(unsigned long) qout_old->range[i].end;
+		}
+		kfree(qout_old);
+	}
+	if (qout->segcnt > 6) {
+		rc = -EOPNOTSUPP;
+		goto out_free;
+	}
+
+	if (qout->segcnt == 1) {
+		seg->vm_segtype = qout->range[0].start & 0xff;
+	} else {
+		/* multi-part segment. only one type supported here:
+		    - all parts are contiguous
+		    - all parts are either EW or EN type
+		    - maximum 6 parts allowed */
+		unsigned long start = qout->segstart >> PAGE_SHIFT;
+		for (i=0; i<qout->segcnt; i++) {
+			if (((qout->range[i].start & 0xff) != SEG_TYPE_EW) &&
+			    ((qout->range[i].start & 0xff) != SEG_TYPE_EN)) {
+				rc = -EOPNOTSUPP;
+				goto out_free;
+			}
+			if (start != qout->range[i].start >> PAGE_SHIFT) {
+				rc = -EOPNOTSUPP;
+				goto out_free;
+			}
+			start = (qout->range[i].end >> PAGE_SHIFT) + 1;
+		}
+		seg->vm_segtype = SEG_TYPE_EWEN;
+	}
+
+	/* analyze diag output and update seg */
+	seg->start_addr = qout->segstart;
+	seg->end = qout->segend;
+
+	memcpy (seg->range, qout->range, 6*sizeof(struct qrange));
+	seg->segcnt = qout->segcnt;
+
+	rc = 0;
+
+ out_free:
+	kfree(qin);
+	kfree(qout);
+	return rc;
+}
+
+/*
+ * get info about a segment
+ * possible return values:
+ * -ENOSYS  : we are not running on VM
+ * -EIO     : could not perform query diagnose
+ * -ENOENT  : no such segment
+ * -EOPNOTSUPP: multi-part segment cannot be used with linux
+ * -ENOMEM  : out of memory
+ * 0 .. 6   : type of segment as defined in include/asm-s390/extmem.h
+ */
+int
+segment_type (char* name)
+{
+	int rc;
+	struct dcss_segment seg;
+
+	if (!MACHINE_IS_VM)
+		return -ENOSYS;
+
+	dcss_mkname(name, seg.dcss_name);
+	rc = query_segment_type (&seg);
+	if (rc < 0)
+		return rc;
+	return seg.vm_segtype;
+}
+
+/*
+ * check if segment collides with other segments that are currently loaded
+ * returns 1 if this is the case, 0 if no collision was found
+ */
+static int
+segment_overlaps_others (struct dcss_segment *seg)
+{
+	struct list_head *l;
+	struct dcss_segment *tmp;
+
+	BUG_ON(!mutex_is_locked(&dcss_lock));
+	list_for_each(l, &dcss_list) {
+		tmp = list_entry(l, struct dcss_segment, list);
+		if ((tmp->start_addr >> 20) > (seg->end >> 20))
+			continue;
+		if ((tmp->end >> 20) < (seg->start_addr >> 20))
+			continue;
+		if (seg == tmp)
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * real segment loading function, called from segment_load
+ */
+static int
+__segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end)
+{
+	unsigned long start_addr, end_addr, dummy;
+	struct dcss_segment *seg;
+	int rc, diag_cc;
+
+	start_addr = end_addr = 0;
+	seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA);
+	if (seg == NULL) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	dcss_mkname (name, seg->dcss_name);
+	rc = query_segment_type (seg);
+	if (rc < 0)
+		goto out_free;
+
+	if (loadshr_scode == DCSS_LOADSHRX) {
+		if (segment_overlaps_others(seg)) {
+			rc = -EBUSY;
+			goto out_free;
+		}
+	}
+
+	rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+
+	if (rc)
+		goto out_free;
+
+	seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+	if (seg->res == NULL) {
+		rc = -ENOMEM;
+		goto out_shared;
+	}
+	seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	seg->res->start = seg->start_addr;
+	seg->res->end = seg->end;
+	memcpy(&seg->res_name, seg->dcss_name, 8);
+	EBCASC(seg->res_name, 8);
+	seg->res_name[8] = '\0';
+	strncat(seg->res_name, " (DCSS)", 7);
+	seg->res->name = seg->res_name;
+	rc = seg->vm_segtype;
+	if (rc == SEG_TYPE_SC ||
+	    ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
+		seg->res->flags |= IORESOURCE_READONLY;
+	if (request_resource(&iomem_resource, seg->res)) {
+		rc = -EBUSY;
+		kfree(seg->res);
+		goto out_shared;
+	}
+
+	if (do_nonshared)
+		diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	else
+		diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	if (diag_cc < 0) {
+		dcss_diag(&purgeseg_scode, seg->dcss_name,
+				&dummy, &dummy);
+		rc = diag_cc;
+		goto out_resource;
+	}
+	if (diag_cc > 1) {
+		pr_warning("Loading DCSS %s failed with rc=%ld\n", name,
+			   end_addr);
+		rc = dcss_diag_translate_rc(end_addr);
+		dcss_diag(&purgeseg_scode, seg->dcss_name,
+				&dummy, &dummy);
+		goto out_resource;
+	}
+	seg->start_addr = start_addr;
+	seg->end = end_addr;
+	seg->do_nonshared = do_nonshared;
+	atomic_set(&seg->ref_count, 1);
+	list_add(&seg->list, &dcss_list);
+	*addr = seg->start_addr;
+	*end  = seg->end;
+	if (do_nonshared)
+		pr_info("DCSS %s of range %p to %p and type %s loaded as "
+			"exclusive-writable\n", name, (void*) seg->start_addr,
+			(void*) seg->end, segtype_string[seg->vm_segtype]);
+	else {
+		pr_info("DCSS %s of range %p to %p and type %s loaded in "
+			"shared access mode\n", name, (void*) seg->start_addr,
+			(void*) seg->end, segtype_string[seg->vm_segtype]);
+	}
+	goto out;
+ out_resource:
+	release_resource(seg->res);
+	kfree(seg->res);
+ out_shared:
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ out_free:
+	kfree(seg);
+ out:
+	return rc;
+}
+
+/*
+ * this function loads a DCSS segment
+ * name         : name of the DCSS
+ * do_nonshared : 0 indicates that the dcss should be shared with other linux images
+ *                1 indicates that the dcss should be exclusive for this linux image
+ * addr         : will be filled with start address of the segment
+ * end          : will be filled with end address of the segment
+ * return values:
+ * -ENOSYS  : we are not running on VM
+ * -EIO     : could not perform query or load diagnose
+ * -ENOENT  : no such segment
+ * -EOPNOTSUPP: multi-part segment cannot be used with linux
+ * -ENOSPC  : segment cannot be used (overlaps with storage)
+ * -EBUSY   : segment can temporarily not be used (overlaps with dcss)
+ * -ERANGE  : segment cannot be used (exceeds kernel mapping range)
+ * -EPERM   : segment is currently loaded with incompatible permissions
+ * -ENOMEM  : out of memory
+ * 0 .. 6   : type of segment as defined in include/asm-s390/extmem.h
+ */
+int
+segment_load (char *name, int do_nonshared, unsigned long *addr,
+		unsigned long *end)
+{
+	struct dcss_segment *seg;
+	int rc;
+
+	if (!MACHINE_IS_VM)
+		return -ENOSYS;
+
+	mutex_lock(&dcss_lock);
+	seg = segment_by_name (name);
+	if (seg == NULL)
+		rc = __segment_load (name, do_nonshared, addr, end);
+	else {
+		if (do_nonshared == seg->do_nonshared) {
+			atomic_inc(&seg->ref_count);
+			*addr = seg->start_addr;
+			*end  = seg->end;
+			rc    = seg->vm_segtype;
+		} else {
+			*addr = *end = 0;
+			rc    = -EPERM;
+		}
+	}
+	mutex_unlock(&dcss_lock);
+	return rc;
+}
+
+/*
+ * this function modifies the shared state of a DCSS segment. note that
+ * name         : name of the DCSS
+ * do_nonshared : 0 indicates that the dcss should be shared with other linux images
+ *                1 indicates that the dcss should be exclusive for this linux image
+ * return values:
+ * -EIO     : could not perform load diagnose (segment gone!)
+ * -ENOENT  : no such segment (segment gone!)
+ * -EAGAIN  : segment is in use by other exploiters, try later
+ * -EINVAL  : no segment with the given name is currently loaded - name invalid
+ * -EBUSY   : segment can temporarily not be used (overlaps with dcss)
+ * 0	    : operation succeeded
+ */
+int
+segment_modify_shared (char *name, int do_nonshared)
+{
+	struct dcss_segment *seg;
+	unsigned long start_addr, end_addr, dummy;
+	int rc, diag_cc;
+
+	start_addr = end_addr = 0;
+	mutex_lock(&dcss_lock);
+	seg = segment_by_name (name);
+	if (seg == NULL) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+	if (do_nonshared == seg->do_nonshared) {
+		pr_info("DCSS %s is already in the requested access "
+			"mode\n", name);
+		rc = 0;
+		goto out_unlock;
+	}
+	if (atomic_read (&seg->ref_count) != 1) {
+		pr_warning("DCSS %s is in use and cannot be reloaded\n",
+			   name);
+		rc = -EAGAIN;
+		goto out_unlock;
+	}
+	release_resource(seg->res);
+	if (do_nonshared)
+		seg->res->flags &= ~IORESOURCE_READONLY;
+	else
+		if (seg->vm_segtype == SEG_TYPE_SR ||
+		    seg->vm_segtype == SEG_TYPE_ER)
+			seg->res->flags |= IORESOURCE_READONLY;
+
+	if (request_resource(&iomem_resource, seg->res)) {
+		pr_warning("DCSS %s overlaps with used memory resources "
+			   "and cannot be reloaded\n", name);
+		rc = -EBUSY;
+		kfree(seg->res);
+		goto out_del_mem;
+	}
+
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	if (do_nonshared)
+		diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	else
+		diag_cc = dcss_diag(&loadshr_scode, seg->dcss_name,
+				&start_addr, &end_addr);
+	if (diag_cc < 0) {
+		rc = diag_cc;
+		goto out_del_res;
+	}
+	if (diag_cc > 1) {
+		pr_warning("Reloading DCSS %s failed with rc=%ld\n", name,
+			   end_addr);
+		rc = dcss_diag_translate_rc(end_addr);
+		goto out_del_res;
+	}
+	seg->start_addr = start_addr;
+	seg->end = end_addr;
+	seg->do_nonshared = do_nonshared;
+	rc = 0;
+	goto out_unlock;
+ out_del_res:
+	release_resource(seg->res);
+	kfree(seg->res);
+ out_del_mem:
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+	list_del(&seg->list);
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	kfree(seg);
+ out_unlock:
+	mutex_unlock(&dcss_lock);
+	return rc;
+}
+
+/*
+ * Decrease the use count of a DCSS segment and remove
+ * it from the address space if nobody is using it
+ * any longer.
+ */
+void
+segment_unload(char *name)
+{
+	unsigned long dummy;
+	struct dcss_segment *seg;
+
+	if (!MACHINE_IS_VM)
+		return;
+
+	mutex_lock(&dcss_lock);
+	seg = segment_by_name (name);
+	if (seg == NULL) {
+		pr_err("Unloading unknown DCSS %s failed\n", name);
+		goto out_unlock;
+	}
+	if (atomic_dec_return(&seg->ref_count) != 0)
+		goto out_unlock;
+	release_resource(seg->res);
+	kfree(seg->res);
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+	list_del(&seg->list);
+	dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy);
+	kfree(seg);
+out_unlock:
+	mutex_unlock(&dcss_lock);
+}
+
+/*
+ * save segment content permanently
+ */
+void
+segment_save(char *name)
+{
+	struct dcss_segment *seg;
+	char cmd1[160];
+	char cmd2[80];
+	int i, response;
+
+	if (!MACHINE_IS_VM)
+		return;
+
+	mutex_lock(&dcss_lock);
+	seg = segment_by_name (name);
+
+	if (seg == NULL) {
+		pr_err("Saving unknown DCSS %s failed\n", name);
+		goto out;
+	}
+
+	sprintf(cmd1, "DEFSEG %s", name);
+	for (i=0; i<seg->segcnt; i++) {
+		sprintf(cmd1+strlen(cmd1), " %lX-%lX %s",
+			seg->range[i].start >> PAGE_SHIFT,
+			seg->range[i].end >> PAGE_SHIFT,
+			segtype_string[seg->range[i].start & 0xff]);
+	}
+	sprintf(cmd2, "SAVESEG %s", name);
+	response = 0;
+	cpcmd(cmd1, NULL, 0, &response);
+	if (response) {
+		pr_err("Saving a DCSS failed with DEFSEG response code "
+		       "%i\n", response);
+		goto out;
+	}
+	cpcmd(cmd2, NULL, 0, &response);
+	if (response) {
+		pr_err("Saving a DCSS failed with SAVESEG response code "
+		       "%i\n", response);
+		goto out;
+	}
+out:
+	mutex_unlock(&dcss_lock);
+}
+
+/*
+ * print appropriate error message for segment_load()/segment_type()
+ * return code
+ */
+void segment_warning(int rc, char *seg_name)
+{
+	switch (rc) {
+	case -ENOENT:
+		pr_err("DCSS %s cannot be loaded or queried\n", seg_name);
+		break;
+	case -ENOSYS:
+		pr_err("DCSS %s cannot be loaded or queried without "
+		       "z/VM\n", seg_name);
+		break;
+	case -EIO:
+		pr_err("Loading or querying DCSS %s resulted in a "
+		       "hardware error\n", seg_name);
+		break;
+	case -EOPNOTSUPP:
+		pr_err("DCSS %s has multiple page ranges and cannot be "
+		       "loaded or queried\n", seg_name);
+		break;
+	case -ENOSPC:
+		pr_err("DCSS %s overlaps with used storage and cannot "
+		       "be loaded\n", seg_name);
+		break;
+	case -EBUSY:
+		pr_err("%s needs used memory resources and cannot be "
+		       "loaded or queried\n", seg_name);
+		break;
+	case -EPERM:
+		pr_err("DCSS %s is already loaded in a different access "
+		       "mode\n", seg_name);
+		break;
+	case -ENOMEM:
+		pr_err("There is not enough memory to load or query "
+		       "DCSS %s\n", seg_name);
+		break;
+	case -ERANGE:
+		pr_err("DCSS %s exceeds the kernel mapping range (%lu) "
+		       "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS);
+		break;
+	default:
+		break;
+	}
+}
+
+EXPORT_SYMBOL(segment_load);
+EXPORT_SYMBOL(segment_unload);
+EXPORT_SYMBOL(segment_save);
+EXPORT_SYMBOL(segment_type);
+EXPORT_SYMBOL(segment_modify_shared);
+EXPORT_SYMBOL(segment_warning);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
new file mode 100644
index 000000000..76515bcea
--- /dev/null
+++ b/arch/s390/mm/fault.c
@@ -0,0 +1,758 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *               Ulrich Weigand (uweigand@de.ibm.com)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/compat.h>
+#include <linux/smp.h>
+#include <linux/kdebug.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
+#include <asm/asm-offsets.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/facility.h>
+#include "../kernel/entry.h"
+
+#define __FAIL_ADDR_MASK -4096L
+#define __SUBCODE_MASK 0x0600
+#define __PF_RES_FIELD 0x8000000000000000ULL
+
+#define VM_FAULT_BADCONTEXT	0x010000
+#define VM_FAULT_BADMAP		0x020000
+#define VM_FAULT_BADACCESS	0x040000
+#define VM_FAULT_SIGNAL		0x080000
+#define VM_FAULT_PFAULT		0x100000
+
+static unsigned long store_indication __read_mostly;
+
+static int __init fault_init(void)
+{
+	if (test_facility(75))
+		store_indication = 0xc00;
+	return 0;
+}
+early_initcall(fault_init);
+
+static inline int notify_page_fault(struct pt_regs *regs)
+{
+	int ret = 0;
+
+	/* kprobe_running() needs smp_processor_id() */
+	if (kprobes_built_in() && !user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, 14))
+			ret = 1;
+		preempt_enable();
+	}
+	return ret;
+}
+
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out.
+ */
+void bust_spinlocks(int yes)
+{
+	if (yes) {
+		oops_in_progress = 1;
+	} else {
+		int loglevel_save = console_loglevel;
+		console_unblank();
+		oops_in_progress = 0;
+		/*
+		 * OK, the message is on the console.  Now we call printk()
+		 * without oops_in_progress set so that printk will give klogd
+		 * a poke.  Hold onto your hats...
+		 */
+		console_loglevel = 15;
+		printk(" ");
+		console_loglevel = loglevel_save;
+	}
+}
+
+/*
+ * Returns the address space associated with the fault.
+ * Returns 0 for kernel space and 1 for user space.
+ */
+static inline int user_space_fault(struct pt_regs *regs)
+{
+	unsigned long trans_exc_code;
+
+	/*
+	 * The lowest two bits of the translation exception
+	 * identification indicate which paging table was used.
+	 */
+	trans_exc_code = regs->int_parm_long & 3;
+	if (trans_exc_code == 3) /* home space -> kernel */
+		return 0;
+	if (user_mode(regs))
+		return 1;
+	if (trans_exc_code == 2) /* secondary space -> set_fs */
+		return current->thread.mm_segment.ar4;
+	if (current->flags & PF_VCPU)
+		return 1;
+	return 0;
+}
+
+static int bad_address(void *p)
+{
+	unsigned long dummy;
+
+	return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+	unsigned long *table = __va(asce & PAGE_MASK);
+
+	pr_alert("AS:%016lx ", asce);
+	switch (asce & _ASCE_TYPE_MASK) {
+	case _ASCE_TYPE_REGION1:
+		table = table + ((address >> 53) & 0x7ff);
+		if (bad_address(table))
+			goto bad;
+		pr_cont("R1:%016lx ", *table);
+		if (*table & _REGION_ENTRY_INVALID)
+			goto out;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* fallthrough */
+	case _ASCE_TYPE_REGION2:
+		table = table + ((address >> 42) & 0x7ff);
+		if (bad_address(table))
+			goto bad;
+		pr_cont("R2:%016lx ", *table);
+		if (*table & _REGION_ENTRY_INVALID)
+			goto out;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* fallthrough */
+	case _ASCE_TYPE_REGION3:
+		table = table + ((address >> 31) & 0x7ff);
+		if (bad_address(table))
+			goto bad;
+		pr_cont("R3:%016lx ", *table);
+		if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+			goto out;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		/* fallthrough */
+	case _ASCE_TYPE_SEGMENT:
+		table = table + ((address >> 20) & 0x7ff);
+		if (bad_address(table))
+			goto bad;
+		pr_cont("S:%016lx ", *table);
+		if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+			goto out;
+		table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+	}
+	table = table + ((address >> 12) & 0xff);
+	if (bad_address(table))
+		goto bad;
+	pr_cont("P:%016lx ", *table);
+out:
+	pr_cont("\n");
+	return;
+bad:
+	pr_cont("BAD\n");
+}
+
+static void dump_fault_info(struct pt_regs *regs)
+{
+	unsigned long asce;
+
+	pr_alert("Fault in ");
+	switch (regs->int_parm_long & 3) {
+	case 3:
+		pr_cont("home space ");
+		break;
+	case 2:
+		pr_cont("secondary space ");
+		break;
+	case 1:
+		pr_cont("access register ");
+		break;
+	case 0:
+		pr_cont("primary space ");
+		break;
+	}
+	pr_cont("mode while using ");
+	if (!user_space_fault(regs)) {
+		asce = S390_lowcore.kernel_asce;
+		pr_cont("kernel ");
+	}
+#ifdef CONFIG_PGSTE
+	else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
+		struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+		asce = gmap->asce;
+		pr_cont("gmap ");
+	}
+#endif
+	else {
+		asce = S390_lowcore.user_asce;
+		pr_cont("user ");
+	}
+	pr_cont("ASCE.\n");
+	dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+}
+
+static inline void report_user_fault(struct pt_regs *regs, long signr)
+{
+	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
+		return;
+	if (!unhandled_signal(current, signr))
+		return;
+	if (!printk_ratelimit())
+		return;
+	printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ",
+	       regs->int_code & 0xffff, regs->int_code >> 17);
+	print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+	printk(KERN_CONT "\n");
+	printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+	       regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+	dump_fault_info(regs);
+	show_regs(regs);
+}
+
+/*
+ * Send SIGSEGV to task.  This is an external routine
+ * to keep the stack usage of do_page_fault small.
+ */
+static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
+{
+	struct siginfo si;
+
+	report_user_fault(regs, SIGSEGV);
+	si.si_signo = SIGSEGV;
+	si.si_code = si_code;
+	si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
+	force_sig_info(SIGSEGV, &si, current);
+}
+
+static noinline void do_no_context(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+	unsigned long address;
+
+	/* Are we prepared to handle this kernel fault?  */
+	fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+	if (fixup) {
+		regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+		return;
+	}
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	address = regs->int_parm_long & __FAIL_ADDR_MASK;
+	if (!user_space_fault(regs))
+		printk(KERN_ALERT "Unable to handle kernel pointer dereference"
+		       " in virtual kernel address space\n");
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request"
+		       " in virtual user address space\n");
+	printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+	       regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+	dump_fault_info(regs);
+	die(regs, "Oops");
+	do_exit(SIGKILL);
+}
+
+static noinline void do_low_address(struct pt_regs *regs)
+{
+	/* Low-address protection hit in kernel mode means
+	   NULL pointer write access in kernel mode.  */
+	if (regs->psw.mask & PSW_MASK_PSTATE) {
+		/* Low-address protection hit in user mode 'cannot happen'. */
+		die (regs, "Low-address protection");
+		do_exit(SIGKILL);
+	}
+
+	do_no_context(regs);
+}
+
+static noinline void do_sigbus(struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	struct siginfo si;
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	si.si_signo = SIGBUS;
+	si.si_errno = 0;
+	si.si_code = BUS_ADRERR;
+	si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
+	force_sig_info(SIGBUS, &si, tsk);
+}
+
+static noinline void do_fault_error(struct pt_regs *regs, int fault)
+{
+	int si_code;
+
+	switch (fault) {
+	case VM_FAULT_BADACCESS:
+	case VM_FAULT_BADMAP:
+		/* Bad memory access. Check if it is kernel or user space. */
+		if (user_mode(regs)) {
+			/* User mode accesses just cause a SIGSEGV */
+			si_code = (fault == VM_FAULT_BADMAP) ?
+				SEGV_MAPERR : SEGV_ACCERR;
+			do_sigsegv(regs, si_code);
+			return;
+		}
+	case VM_FAULT_BADCONTEXT:
+	case VM_FAULT_PFAULT:
+		do_no_context(regs);
+		break;
+	case VM_FAULT_SIGNAL:
+		if (!user_mode(regs))
+			do_no_context(regs);
+		break;
+	default: /* fault & VM_FAULT_ERROR */
+		if (fault & VM_FAULT_OOM) {
+			if (!user_mode(regs))
+				do_no_context(regs);
+			else
+				pagefault_out_of_memory();
+		} else if (fault & VM_FAULT_SIGSEGV) {
+			/* Kernel mode? Handle exceptions or die */
+			if (!user_mode(regs))
+				do_no_context(regs);
+			else
+				do_sigsegv(regs, SEGV_MAPERR);
+		} else if (fault & VM_FAULT_SIGBUS) {
+			/* Kernel mode? Handle exceptions or die */
+			if (!user_mode(regs))
+				do_no_context(regs);
+			else
+				do_sigbus(regs);
+		} else
+			BUG();
+		break;
+	}
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * interruption code (int_code):
+ *   04       Protection           ->  Write-Protection  (suprression)
+ *   10       Segment translation  ->  Not present       (nullification)
+ *   11       Page translation     ->  Not present       (nullification)
+ *   3b       Region third trans.  ->  Not present       (nullification)
+ */
+static inline int do_exception(struct pt_regs *regs, int access)
+{
+#ifdef CONFIG_PGSTE
+	struct gmap *gmap;
+#endif
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	unsigned long trans_exc_code;
+	unsigned long address;
+	unsigned int flags;
+	int fault;
+
+	tsk = current;
+	/*
+	 * The instruction that caused the program check has
+	 * been nullified. Don't signal single step via SIGTRAP.
+	 */
+	clear_pt_regs_flag(regs, PIF_PER_TRAP);
+
+	if (notify_page_fault(regs))
+		return 0;
+
+	mm = tsk->mm;
+	trans_exc_code = regs->int_parm_long;
+
+	/*
+	 * Verify that the fault happened in user space, that
+	 * we are not in an interrupt and that there is a 
+	 * user context.
+	 */
+	fault = VM_FAULT_BADCONTEXT;
+	if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
+		goto out;
+
+	address = trans_exc_code & __FAIL_ADDR_MASK;
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
+		flags |= FAULT_FLAG_WRITE;
+	down_read(&mm->mmap_sem);
+
+#ifdef CONFIG_PGSTE
+	gmap = (current->flags & PF_VCPU) ?
+		(struct gmap *) S390_lowcore.gmap : NULL;
+	if (gmap) {
+		current->thread.gmap_addr = address;
+		address = __gmap_translate(gmap, address);
+		if (address == -EFAULT) {
+			fault = VM_FAULT_BADMAP;
+			goto out_up;
+		}
+		if (gmap->pfault_enabled)
+			flags |= FAULT_FLAG_RETRY_NOWAIT;
+	}
+#endif
+
+retry:
+	fault = VM_FAULT_BADMAP;
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto out_up;
+
+	if (unlikely(vma->vm_start > address)) {
+		if (!(vma->vm_flags & VM_GROWSDOWN))
+			goto out_up;
+		if (expand_stack(vma, address))
+			goto out_up;
+	}
+
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it..
+	 */
+	fault = VM_FAULT_BADACCESS;
+	if (unlikely(!(vma->vm_flags & access)))
+		goto out_up;
+
+	if (is_vm_hugetlb_page(vma))
+		address &= HPAGE_MASK;
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(mm, vma, address, flags);
+	/* No reason to continue if interrupted by SIGKILL. */
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+		fault = VM_FAULT_SIGNAL;
+		goto out;
+	}
+	if (unlikely(fault & VM_FAULT_ERROR))
+		goto out_up;
+
+	/*
+	 * Major/minor page fault accounting is only done on the
+	 * initial attempt. If we go through a retry, it is extremely
+	 * likely that the page will be found in page cache at that point.
+	 */
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			tsk->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+				      regs, address);
+		} else {
+			tsk->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+				      regs, address);
+		}
+		if (fault & VM_FAULT_RETRY) {
+#ifdef CONFIG_PGSTE
+			if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
+				/* FAULT_FLAG_RETRY_NOWAIT has been set,
+				 * mmap_sem has not been released */
+				current->thread.gmap_pfault = 1;
+				fault = VM_FAULT_PFAULT;
+				goto out_up;
+			}
+#endif
+			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+			 * of starvation. */
+			flags &= ~(FAULT_FLAG_ALLOW_RETRY |
+				   FAULT_FLAG_RETRY_NOWAIT);
+			flags |= FAULT_FLAG_TRIED;
+			down_read(&mm->mmap_sem);
+			goto retry;
+		}
+	}
+#ifdef CONFIG_PGSTE
+	if (gmap) {
+		address =  __gmap_link(gmap, current->thread.gmap_addr,
+				       address);
+		if (address == -EFAULT) {
+			fault = VM_FAULT_BADMAP;
+			goto out_up;
+		}
+		if (address == -ENOMEM) {
+			fault = VM_FAULT_OOM;
+			goto out_up;
+		}
+	}
+#endif
+	fault = 0;
+out_up:
+	up_read(&mm->mmap_sem);
+out:
+	return fault;
+}
+
+void do_protection_exception(struct pt_regs *regs)
+{
+	unsigned long trans_exc_code;
+	int fault;
+
+	trans_exc_code = regs->int_parm_long;
+	/*
+	 * Protection exceptions are suppressing, decrement psw address.
+	 * The exception to this rule are aborted transactions, for these
+	 * the PSW already points to the correct location.
+	 */
+	if (!(regs->int_code & 0x200))
+		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+	/*
+	 * Check for low-address protection.  This needs to be treated
+	 * as a special case because the translation exception code
+	 * field is not guaranteed to contain valid data in this case.
+	 */
+	if (unlikely(!(trans_exc_code & 4))) {
+		do_low_address(regs);
+		return;
+	}
+	fault = do_exception(regs, VM_WRITE);
+	if (unlikely(fault))
+		do_fault_error(regs, fault);
+}
+NOKPROBE_SYMBOL(do_protection_exception);
+
+void do_dat_exception(struct pt_regs *regs)
+{
+	int access, fault;
+
+	access = VM_READ | VM_EXEC | VM_WRITE;
+	fault = do_exception(regs, access);
+	if (unlikely(fault))
+		do_fault_error(regs, fault);
+}
+NOKPROBE_SYMBOL(do_dat_exception);
+
+#ifdef CONFIG_PFAULT 
+/*
+ * 'pfault' pseudo page faults routines.
+ */
+static int pfault_disable;
+
+static int __init nopfault(char *str)
+{
+	pfault_disable = 1;
+	return 1;
+}
+
+__setup("nopfault", nopfault);
+
+struct pfault_refbk {
+	u16 refdiagc;
+	u16 reffcode;
+	u16 refdwlen;
+	u16 refversn;
+	u64 refgaddr;
+	u64 refselmk;
+	u64 refcmpmk;
+	u64 reserved;
+} __attribute__ ((packed, aligned(8)));
+
+int pfault_init(void)
+{
+	struct pfault_refbk refbk = {
+		.refdiagc = 0x258,
+		.reffcode = 0,
+		.refdwlen = 5,
+		.refversn = 2,
+		.refgaddr = __LC_CURRENT_PID,
+		.refselmk = 1ULL << 48,
+		.refcmpmk = 1ULL << 48,
+		.reserved = __PF_RES_FIELD };
+        int rc;
+
+	if (pfault_disable)
+		return -1;
+	asm volatile(
+		"	diag	%1,%0,0x258\n"
+		"0:	j	2f\n"
+		"1:	la	%0,8\n"
+		"2:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
+        return rc;
+}
+
+void pfault_fini(void)
+{
+	struct pfault_refbk refbk = {
+		.refdiagc = 0x258,
+		.reffcode = 1,
+		.refdwlen = 5,
+		.refversn = 2,
+	};
+
+	if (pfault_disable)
+		return;
+	asm volatile(
+		"	diag	%0,0,0x258\n"
+		"0:\n"
+		EX_TABLE(0b,0b)
+		: : "a" (&refbk), "m" (refbk) : "cc");
+}
+
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
+static void pfault_interrupt(struct ext_code ext_code,
+			     unsigned int param32, unsigned long param64)
+{
+	struct task_struct *tsk;
+	__u16 subcode;
+	pid_t pid;
+
+	/*
+	 * Get the external interruption subcode & pfault
+	 * initial/completion signal bit. VM stores this 
+	 * in the 'cpu address' field associated with the
+         * external interrupt. 
+	 */
+	subcode = ext_code.subcode;
+	if ((subcode & 0xff00) != __SUBCODE_MASK)
+		return;
+	inc_irq_stat(IRQEXT_PFL);
+	/* Get the token (= pid of the affected task). */
+	pid = sizeof(void *) == 4 ? param32 : param64;
+	rcu_read_lock();
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	if (tsk)
+		get_task_struct(tsk);
+	rcu_read_unlock();
+	if (!tsk)
+		return;
+	spin_lock(&pfault_lock);
+	if (subcode & 0x0080) {
+		/* signal bit is set -> a page has been swapped in by VM */
+		if (tsk->thread.pfault_wait == 1) {
+			/* Initial interrupt was faster than the completion
+			 * interrupt. pfault_wait is valid. Set pfault_wait
+			 * back to zero and wake up the process. This can
+			 * safely be done because the task is still sleeping
+			 * and can't produce new pfaults. */
+			tsk->thread.pfault_wait = 0;
+			list_del(&tsk->thread.list);
+			wake_up_process(tsk);
+			put_task_struct(tsk);
+		} else {
+			/* Completion interrupt was faster than initial
+			 * interrupt. Set pfault_wait to -1 so the initial
+			 * interrupt doesn't put the task to sleep.
+			 * If the task is not running, ignore the completion
+			 * interrupt since it must be a leftover of a PFAULT
+			 * CANCEL operation which didn't remove all pending
+			 * completion interrupts. */
+			if (tsk->state == TASK_RUNNING)
+				tsk->thread.pfault_wait = -1;
+		}
+	} else {
+		/* signal bit not set -> a real page is missing. */
+		if (WARN_ON_ONCE(tsk != current))
+			goto out;
+		if (tsk->thread.pfault_wait == 1) {
+			/* Already on the list with a reference: put to sleep */
+			__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			set_tsk_need_resched(tsk);
+		} else if (tsk->thread.pfault_wait == -1) {
+			/* Completion interrupt was faster than the initial
+			 * interrupt (pfault_wait == -1). Set pfault_wait
+			 * back to zero and exit. */
+			tsk->thread.pfault_wait = 0;
+		} else {
+			/* Initial interrupt arrived before completion
+			 * interrupt. Let the task sleep.
+			 * An extra task reference is needed since a different
+			 * cpu may set the task state to TASK_RUNNING again
+			 * before the scheduler is reached. */
+			get_task_struct(tsk);
+			tsk->thread.pfault_wait = 1;
+			list_add(&tsk->thread.list, &pfault_list);
+			__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			set_tsk_need_resched(tsk);
+		}
+	}
+out:
+	spin_unlock(&pfault_lock);
+	put_task_struct(tsk);
+}
+
+static int pfault_cpu_notify(struct notifier_block *self, unsigned long action,
+			     void *hcpu)
+{
+	struct thread_struct *thread, *next;
+	struct task_struct *tsk;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_DEAD:
+		spin_lock_irq(&pfault_lock);
+		list_for_each_entry_safe(thread, next, &pfault_list, list) {
+			thread->pfault_wait = 0;
+			list_del(&thread->list);
+			tsk = container_of(thread, struct task_struct, thread);
+			wake_up_process(tsk);
+			put_task_struct(tsk);
+		}
+		spin_unlock_irq(&pfault_lock);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static int __init pfault_irq_init(void)
+{
+	int rc;
+
+	rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+	if (rc)
+		goto out_extint;
+	rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
+	if (rc)
+		goto out_pfault;
+	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
+	hotcpu_notifier(pfault_cpu_notify, 0);
+	return 0;
+
+out_pfault:
+	unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+out_extint:
+	pfault_disable = 1;
+	return rc;
+}
+early_initcall(pfault_irq_init);
+
+#endif /* CONFIG_PFAULT */
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
new file mode 100644
index 000000000..1eb41bb30
--- /dev/null
+++ b/arch/s390/mm/gup.c
@@ -0,0 +1,240 @@
+/*
+ *  Lockless get_user_pages_fast for s390
+ *
+ *  Copyright IBM Corp. 2010
+ *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long mask;
+	pte_t *ptep, pte;
+	struct page *page;
+
+	mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
+
+	ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
+	do {
+		pte = *ptep;
+		barrier();
+		if ((pte_val(pte) & mask) != 0)
+			return 0;
+		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+		page = pte_page(pte);
+		if (!page_cache_get_speculative(page))
+			return 0;
+		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+			put_page(page);
+			return 0;
+		}
+		pages[*nr] = page;
+		(*nr)++;
+
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+
+	return 1;
+}
+
+static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long mask, result;
+	struct page *head, *page, *tail;
+	int refs;
+
+	result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
+	mask = result | _SEGMENT_ENTRY_INVALID;
+	if ((pmd_val(pmd) & mask) != result)
+		return 0;
+	VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
+
+	refs = 0;
+	head = pmd_page(pmd);
+	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+	tail = page;
+	do {
+		VM_BUG_ON(compound_head(page) != head);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	/*
+	 * Any tail page need their mapcount reference taken before we
+	 * return.
+	 */
+	while (refs--) {
+		if (PageTail(tail))
+			get_huge_page_tail(tail);
+		tail++;
+	}
+
+	return 1;
+}
+
+
+static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pmd_t *pmdp, pmd;
+
+	pmdp = (pmd_t *) pudp;
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
+		pmdp = (pmd_t *) pud_deref(pud);
+	pmdp += pmd_index(addr);
+	do {
+		pmd = *pmdp;
+		barrier();
+		next = pmd_addr_end(addr, end);
+		/*
+		 * The pmd_trans_splitting() check below explains why
+		 * pmdp_splitting_flush() has to serialize with
+		 * smp_call_function() against our disabled IRQs, to stop
+		 * this gup-fast code from running while we set the
+		 * splitting bit in the pmd. Returning zero will take
+		 * the slow path that will call wait_split_huge_page()
+		 * if the pmd is still in splitting state.
+		 */
+		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+			return 0;
+		if (unlikely(pmd_large(pmd))) {
+			if (!gup_huge_pmd(pmdp, pmd, addr, next,
+					  write, pages, nr))
+				return 0;
+		} else if (!gup_pte_range(pmdp, pmd, addr, next,
+					  write, pages, nr))
+			return 0;
+	} while (pmdp++, addr = next, addr != end);
+
+	return 1;
+}
+
+static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pud_t *pudp, pud;
+
+	pudp = (pud_t *) pgdp;
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
+		pudp = (pud_t *) pgd_deref(pgd);
+	pudp += pud_index(addr);
+	do {
+		pud = *pudp;
+		barrier();
+		next = pud_addr_end(addr, end);
+		if (pud_none(pud))
+			return 0;
+		if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
+			return 0;
+	} while (pudp++, addr = next, addr != end);
+
+	return 1;
+}
+
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			  struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next, flags;
+	pgd_t *pgdp, pgd;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+	if ((end <= start) || (end > TASK_SIZE))
+		return 0;
+	/*
+	 * local_irq_save() doesn't prevent pagetable teardown, but does
+	 * prevent the pagetables from being freed on s390.
+	 *
+	 * So long as we atomically load page table pointers versus teardown,
+	 * we can follow the address down to the the page and take a ref on it.
+	 */
+	local_irq_save(flags);
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd = *pgdp;
+		barrier();
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			break;
+		if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
+			break;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_restore(flags);
+
+	return nr;
+}
+
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start:	starting user address
+ * @nr_pages:	number of pages from start to pin
+ * @write:	whether pages will be written to
+ * @pages:	array that receives pointers to the pages pinned.
+ *		Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	int nr, ret;
+
+	start &= PAGE_MASK;
+	nr = __get_user_pages_fast(start, nr_pages, write, pages);
+	if (nr == nr_pages)
+		return nr;
+
+	/* Try to get the remaining pages with get_user_pages */
+	start += nr << PAGE_SHIFT;
+	pages += nr;
+	ret = get_user_pages_unlocked(current, mm, start,
+			     nr_pages - nr, write, 0, pages);
+	/* Have to be a bit careful with return values */
+	if (nr > 0)
+		ret = (ret < 0) ? nr : ret + nr;
+	return ret;
+}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
new file mode 100644
index 000000000..e617e74b7
--- /dev/null
+++ b/arch/s390/mm/hugetlbpage.c
@@ -0,0 +1,212 @@
+/*
+ *  IBM System z Huge TLB Page Support for Kernel.
+ *
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+
+static inline pmd_t __pte_to_pmd(pte_t pte)
+{
+	pmd_t pmd;
+
+	/*
+	 * Convert encoding		  pte bits	   pmd bits
+	 *				lIR.uswrdy.p	dy..R...I...wr
+	 * empty			010.000000.0 -> 00..0...1...00
+	 * prot-none, clean, old	111.000000.1 -> 00..1...1...00
+	 * prot-none, clean, young	111.000001.1 -> 01..1...1...00
+	 * prot-none, dirty, old	111.000010.1 -> 10..1...1...00
+	 * prot-none, dirty, young	111.000011.1 -> 11..1...1...00
+	 * read-only, clean, old	111.000100.1 -> 00..1...1...01
+	 * read-only, clean, young	101.000101.1 -> 01..1...0...01
+	 * read-only, dirty, old	111.000110.1 -> 10..1...1...01
+	 * read-only, dirty, young	101.000111.1 -> 11..1...0...01
+	 * read-write, clean, old	111.001100.1 -> 00..1...1...11
+	 * read-write, clean, young	101.001101.1 -> 01..1...0...11
+	 * read-write, dirty, old	110.001110.1 -> 10..0...1...11
+	 * read-write, dirty, young	100.001111.1 -> 11..0...0...11
+	 * HW-bits: R read-only, I invalid
+	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
+	 *	    u unused, l large
+	 */
+	if (pte_present(pte)) {
+		pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
+		pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4;
+		pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4;
+		pmd_val(pmd) |=	(pte_val(pte) & _PAGE_INVALID) >> 5;
+		pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
+		pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+		pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+	} else
+		pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
+	return pmd;
+}
+
+static inline pte_t __pmd_to_pte(pmd_t pmd)
+{
+	pte_t pte;
+
+	/*
+	 * Convert encoding		   pmd bits	    pte bits
+	 *				dy..R...I...wr	  lIR.uswrdy.p
+	 * empty			00..0...1...00 -> 010.000000.0
+	 * prot-none, clean, old	00..1...1...00 -> 111.000000.1
+	 * prot-none, clean, young	01..1...1...00 -> 111.000001.1
+	 * prot-none, dirty, old	10..1...1...00 -> 111.000010.1
+	 * prot-none, dirty, young	11..1...1...00 -> 111.000011.1
+	 * read-only, clean, old	00..1...1...01 -> 111.000100.1
+	 * read-only, clean, young	01..1...0...01 -> 101.000101.1
+	 * read-only, dirty, old	10..1...1...01 -> 111.000110.1
+	 * read-only, dirty, young	11..1...0...01 -> 101.000111.1
+	 * read-write, clean, old	00..1...1...11 -> 111.001100.1
+	 * read-write, clean, young	01..1...0...11 -> 101.001101.1
+	 * read-write, dirty, old	10..0...1...11 -> 110.001110.1
+	 * read-write, dirty, young	11..0...0...11 -> 100.001111.1
+	 * HW-bits: R read-only, I invalid
+	 * SW-bits: p present, y young, d dirty, r read, w write, s special,
+	 *	    u unused, l large
+	 */
+	if (pmd_present(pmd)) {
+		pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
+		pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
+		pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
+	} else
+		pte_val(pte) = _PAGE_INVALID;
+	return pte;
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte)
+{
+	pmd_t pmd;
+
+	pmd = __pte_to_pmd(pte);
+	if (!MACHINE_HAS_HPAGE) {
+		/* Emulated huge ptes loose the dirty and young bit */
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) |= pte_page(pte)[1].index;
+	} else
+		pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+	*(pmd_t *) ptep = pmd;
+}
+
+pte_t huge_ptep_get(pte_t *ptep)
+{
+	unsigned long origin;
+	pmd_t pmd;
+
+	pmd = *(pmd_t *) ptep;
+	if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
+		origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
+		pmd_val(pmd) |= *(unsigned long *) origin;
+		/* Emulated huge ptes are young and dirty by definition */
+		pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY;
+	}
+	return __pmd_to_pte(pmd);
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep)
+{
+	pmd_t *pmdp = (pmd_t *) ptep;
+	pte_t pte = huge_ptep_get(ptep);
+
+	pmdp_flush_direct(mm, addr, pmdp);
+	pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+	return pte;
+}
+
+int arch_prepare_hugepage(struct page *page)
+{
+	unsigned long addr = page_to_phys(page);
+	pte_t pte;
+	pte_t *ptep;
+	int i;
+
+	if (MACHINE_HAS_HPAGE)
+		return 0;
+
+	ptep = (pte_t *) pte_alloc_one(&init_mm, addr);
+	if (!ptep)
+		return -ENOMEM;
+
+	pte_val(pte) = addr;
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
+		pte_val(pte) += PAGE_SIZE;
+	}
+	page[1].index = (unsigned long) ptep;
+	return 0;
+}
+
+void arch_release_hugepage(struct page *page)
+{
+	pte_t *ptep;
+
+	if (MACHINE_HAS_HPAGE)
+		return;
+
+	ptep = (pte_t *) page[1].index;
+	if (!ptep)
+		return;
+	clear_table((unsigned long *) ptep, _PAGE_INVALID,
+		    PTRS_PER_PTE * sizeof(pte_t));
+	page_table_free(&init_mm, (unsigned long *) ptep);
+	page[1].index = 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	pudp = pud_alloc(mm, pgdp, addr);
+	if (pudp)
+		pmdp = pmd_alloc(mm, pudp, addr);
+	return (pte_t *) pmdp;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	if (pgd_present(*pgdp)) {
+		pudp = pud_offset(pgdp, addr);
+		if (pud_present(*pudp))
+			pmdp = pmd_offset(pudp, addr);
+	}
+	return (pte_t *) pmdp;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+	return 0;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	if (!MACHINE_HAS_HPAGE)
+		return 0;
+
+	return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+}
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
new file mode 100644
index 000000000..80875c43a
--- /dev/null
+++ b/arch/s390/mm/init.c
@@ -0,0 +1,230 @@
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Hartmut Penner (hp@de.ibm.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/memory.h>
+#include <linux/pfn.h>
+#include <linux/poison.h>
+#include <linux/initrd.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/lowcore.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/ctl_reg.h>
+#include <asm/sclp.h>
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
+
+unsigned long empty_zero_page, zero_page_mask;
+EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
+
+static void __init setup_zero_pages(void)
+{
+	struct cpuid cpu_id;
+	unsigned int order;
+	struct page *page;
+	int i;
+
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x9672:	/* g5 */
+	case 0x2064:	/* z900 */
+	case 0x2066:	/* z900 */
+	case 0x2084:	/* z990 */
+	case 0x2086:	/* z990 */
+	case 0x2094:	/* z9-109 */
+	case 0x2096:	/* z9-109 */
+		order = 0;
+		break;
+	case 0x2097:	/* z10 */
+	case 0x2098:	/* z10 */
+	case 0x2817:	/* z196 */
+	case 0x2818:	/* z196 */
+		order = 2;
+		break;
+	case 0x2827:	/* zEC12 */
+	case 0x2828:	/* zEC12 */
+		order = 5;
+		break;
+	case 0x2964:	/* z13 */
+	default:
+		order = 7;
+		break;
+	}
+	/* Limit number of empty zero pages for small memory sizes */
+	while (order > 2 && (totalram_pages >> 10) < (1UL << order))
+		order--;
+
+	empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!empty_zero_page)
+		panic("Out of memory in setup_zero_pages");
+
+	page = virt_to_page((void *) empty_zero_page);
+	split_page(page, order);
+	for (i = 1 << order; i > 0; i--) {
+		mark_page_reserved(page);
+		page++;
+	}
+
+	zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
+}
+
+/*
+ * paging_init() sets up the page tables
+ */
+void __init paging_init(void)
+{
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	unsigned long pgd_type, asce_bits;
+
+	init_mm.pgd = swapper_pg_dir;
+	if (VMALLOC_END > (1UL << 42)) {
+		asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+		pgd_type = _REGION2_ENTRY_EMPTY;
+	} else {
+		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+		pgd_type = _REGION3_ENTRY_EMPTY;
+	}
+	S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
+	clear_table((unsigned long *) init_mm.pgd, pgd_type,
+		    sizeof(unsigned long)*2048);
+	vmem_map_init();
+
+        /* enable virtual mapping in kernel mode */
+	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
+	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
+	__ctl_load(S390_lowcore.kernel_asce, 13, 13);
+	arch_local_irq_restore(4UL << (BITS_PER_LONG - 8));
+
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
+	sparse_init();
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+	max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+	free_area_init_nodes(max_zone_pfns);
+}
+
+void __init mem_init(void)
+{
+	if (MACHINE_HAS_TLB_LC)
+		cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(0, mm_cpumask(&init_mm));
+	atomic_set(&init_mm.context.attach_count, 1);
+
+        max_mapnr = max_low_pfn;
+        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+	/* Setup guest page hinting */
+	cmma_init();
+
+	/* this will put all low memory onto the freelists */
+	free_all_bootmem();
+	setup_zero_pages();	/* Setup zeroed pages. */
+
+	mem_init_print_info(NULL);
+	printk("Write protected kernel read-only data: %#lx - %#lx\n",
+	       (unsigned long)&_stext,
+	       PFN_ALIGN((unsigned long)&_eshared) - 1);
+}
+
+void free_initmem(void)
+{
+	free_initmem_default(POISON_FREE_INITMEM);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
+}
+#endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_add_memory(int nid, u64 start, u64 size)
+{
+	unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+	unsigned long start_pfn = PFN_DOWN(start);
+	unsigned long size_pages = PFN_DOWN(size);
+	struct zone *zone;
+	int rc;
+
+	rc = vmem_add_mapping(start, size);
+	if (rc)
+		return rc;
+	for_each_zone(zone) {
+		if (zone_idx(zone) != ZONE_MOVABLE) {
+			/* Add range within existing zone limits */
+			zone_start_pfn = zone->zone_start_pfn;
+			zone_end_pfn = zone->zone_start_pfn +
+				       zone->spanned_pages;
+		} else {
+			/* Add remaining range to ZONE_MOVABLE */
+			zone_start_pfn = start_pfn;
+			zone_end_pfn = start_pfn + size_pages;
+		}
+		if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
+			continue;
+		nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
+			   zone_end_pfn - start_pfn : size_pages;
+		rc = __add_pages(nid, zone, start_pfn, nr_pages);
+		if (rc)
+			break;
+		start_pfn += nr_pages;
+		size_pages -= nr_pages;
+		if (!size_pages)
+			break;
+	}
+	if (rc)
+		vmem_remove_mapping(start, size);
+	return rc;
+}
+
+unsigned long memory_block_size_bytes(void)
+{
+	/*
+	 * Make sure the memory block size is always greater
+	 * or equal than the memory increment size.
+	 */
+	return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm());
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+	/*
+	 * There is no hardware or firmware interface which could trigger a
+	 * hot memory remove on s390. So there is nothing that needs to be
+	 * implemented.
+	 */
+	return -EBUSY;
+}
+#endif
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
new file mode 100644
index 000000000..8a993a53f
--- /dev/null
+++ b/arch/s390/mm/maccess.c
@@ -0,0 +1,208 @@
+/*
+ * Access kernel memory without faulting -- s390 specific implementation.
+ *
+ * Copyright IBM Corp. 2009, 2015
+ *
+ *   Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/cpu.h>
+#include <asm/ctl_reg.h>
+#include <asm/io.h>
+
+static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
+{
+	unsigned long aligned, offset, count;
+	char tmp[8];
+
+	aligned = (unsigned long) dst & ~7UL;
+	offset = (unsigned long) dst & 7UL;
+	size = min(8UL - offset, size);
+	count = size - 1;
+	asm volatile(
+		"	bras	1,0f\n"
+		"	mvc	0(1,%4),0(%5)\n"
+		"0:	mvc	0(8,%3),0(%0)\n"
+		"	ex	%1,0(1)\n"
+		"	lg	%1,0(%3)\n"
+		"	lra	%0,0(%0)\n"
+		"	sturg	%1,%0\n"
+		: "+&a" (aligned), "+&a" (count), "=m" (tmp)
+		: "a" (&tmp), "a" (&tmp[offset]), "a" (src)
+		: "cc", "memory", "1");
+	return size;
+}
+
+/*
+ * s390_kernel_write - write to kernel memory bypassing DAT
+ * @dst: destination address
+ * @src: source address
+ * @size: number of bytes to copy
+ *
+ * This function writes to kernel memory bypassing DAT and possible page table
+ * write protection. It writes to the destination using the sturg instruction.
+ * Therefore we have a read-modify-write sequence: the function reads eight
+ * bytes from destination at an eight byte boundary, modifies the bytes
+ * requested and writes the result back in a loop.
+ *
+ * Note: this means that this function may not be called concurrently on
+ *	 several cpus with overlapping words, since this may potentially
+ *	 cause data corruption.
+ */
+void notrace s390_kernel_write(void *dst, const void *src, size_t size)
+{
+	long copied;
+
+	while (size) {
+		copied = s390_kernel_write_odd(dst, src, size);
+		dst += copied;
+		src += copied;
+		size -= copied;
+	}
+}
+
+static int __memcpy_real(void *dest, void *src, size_t count)
+{
+	register unsigned long _dest asm("2") = (unsigned long) dest;
+	register unsigned long _len1 asm("3") = (unsigned long) count;
+	register unsigned long _src  asm("4") = (unsigned long) src;
+	register unsigned long _len2 asm("5") = (unsigned long) count;
+	int rc = -EFAULT;
+
+	asm volatile (
+		"0:	mvcle	%1,%2,0x0\n"
+		"1:	jo	0b\n"
+		"	lhi	%0,0x0\n"
+		"2:\n"
+		EX_TABLE(1b,2b)
+		: "+d" (rc), "+d" (_dest), "+d" (_src), "+d" (_len1),
+		  "+d" (_len2), "=m" (*((long *) dest))
+		: "m" (*((long *) src))
+		: "cc", "memory");
+	return rc;
+}
+
+/*
+ * Copy memory in real mode (kernel to kernel)
+ */
+int memcpy_real(void *dest, void *src, size_t count)
+{
+	unsigned long flags;
+	int rc;
+
+	if (!count)
+		return 0;
+	local_irq_save(flags);
+	__arch_local_irq_stnsm(0xfbUL);
+	rc = __memcpy_real(dest, src, count);
+	local_irq_restore(flags);
+	return rc;
+}
+
+/*
+ * Copy memory in absolute mode (kernel to kernel)
+ */
+void memcpy_absolute(void *dest, void *src, size_t count)
+{
+	unsigned long cr0, flags, prefix;
+
+	flags = arch_local_irq_save();
+	__ctl_store(cr0, 0, 0);
+	__ctl_clear_bit(0, 28); /* disable lowcore protection */
+	prefix = store_prefix();
+	if (prefix) {
+		local_mcck_disable();
+		set_prefix(0);
+		memcpy(dest, src, count);
+		set_prefix(prefix);
+		local_mcck_enable();
+	} else {
+		memcpy(dest, src, count);
+	}
+	__ctl_load(cr0, 0, 0);
+	arch_local_irq_restore(flags);
+}
+
+/*
+ * Copy memory from kernel (real) to user (virtual)
+ */
+int copy_to_user_real(void __user *dest, void *src, unsigned long count)
+{
+	int offs = 0, size, rc;
+	char *buf;
+
+	buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	rc = -EFAULT;
+	while (offs < count) {
+		size = min(PAGE_SIZE, count - offs);
+		if (memcpy_real(buf, src + offs, size))
+			goto out;
+		if (copy_to_user(dest + offs, buf, size))
+			goto out;
+		offs += size;
+	}
+	rc = 0;
+out:
+	free_page((unsigned long) buf);
+	return rc;
+}
+
+/*
+ * Check if physical address is within prefix or zero page
+ */
+static int is_swapped(unsigned long addr)
+{
+	unsigned long lc;
+	int cpu;
+
+	if (addr < sizeof(struct _lowcore))
+		return 1;
+	for_each_online_cpu(cpu) {
+		lc = (unsigned long) lowcore_ptr[cpu];
+		if (addr > lc + sizeof(struct _lowcore) - 1 || addr < lc)
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Convert a physical pointer for /dev/mem access
+ *
+ * For swapped prefix pages a new buffer is returned that contains a copy of
+ * the absolute memory. The buffer size is maximum one page large.
+ */
+void *xlate_dev_mem_ptr(phys_addr_t addr)
+{
+	void *bounce = (void *) addr;
+	unsigned long size;
+
+	get_online_cpus();
+	preempt_disable();
+	if (is_swapped(addr)) {
+		size = PAGE_SIZE - (addr & ~PAGE_MASK);
+		bounce = (void *) __get_free_page(GFP_ATOMIC);
+		if (bounce)
+			memcpy_absolute(bounce, (void *) addr, size);
+	}
+	preempt_enable();
+	put_online_cpus();
+	return bounce;
+}
+
+/*
+ * Free converted buffer for /dev/mem access (if necessary)
+ */
+void unxlate_dev_mem_ptr(phys_addr_t addr, void *buf)
+{
+	if ((void *) addr != buf)
+		free_page((unsigned long) buf);
+}
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
new file mode 100644
index 000000000..0f3604395
--- /dev/null
+++ b/arch/s390/mm/mem_detect.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+
+#define ADDR2G (1ULL << 31)
+
+#define CHUNK_READ_WRITE 0
+#define CHUNK_READ_ONLY  1
+
+static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size)
+{
+	memblock_add_range(&memblock.memory, start, size, 0, 0);
+	memblock_add_range(&memblock.physmem, start, size, 0, 0);
+}
+
+void __init detect_memory_memblock(void)
+{
+	unsigned long long memsize, rnmax, rzm;
+	unsigned long addr, size;
+	int type;
+
+	rzm = sclp_get_rzm();
+	rnmax = sclp_get_rnmax();
+	memsize = rzm * rnmax;
+	if (!rzm)
+		rzm = 1ULL << 17;
+	max_physmem_end = memsize;
+	addr = 0;
+	/* keep memblock lists close to the kernel */
+	memblock_set_bottom_up(true);
+	do {
+		size = 0;
+		type = tprot(addr);
+		do {
+			size += rzm;
+			if (max_physmem_end && addr + size >= max_physmem_end)
+				break;
+		} while (type == tprot(addr + size));
+		if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
+			if (max_physmem_end && (addr + size > max_physmem_end))
+				size = max_physmem_end - addr;
+			memblock_physmem_add(addr, size);
+		}
+		addr += size;
+	} while (addr < max_physmem_end);
+	memblock_set_bottom_up(false);
+	if (!max_physmem_end)
+		max_physmem_end = memblock_end_of_DRAM();
+}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
new file mode 100644
index 000000000..6e552af08
--- /dev/null
+++ b/arch/s390/mm/mmap.c
@@ -0,0 +1,288 @@
+/*
+ *  flexible mmap layout support
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * Started by Ingo Molnar <mingo@elte.hu>
+ */
+
+#include <linux/personality.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/compat.h>
+#include <linux/security.h>
+#include <asm/pgalloc.h>
+
+unsigned long mmap_rnd_mask;
+static unsigned long mmap_align_mask;
+
+static unsigned long stack_maxrandom_size(void)
+{
+	if (!(current->flags & PF_RANDOMIZE))
+		return 0;
+	if (current->personality & ADDR_NO_RANDOMIZE)
+		return 0;
+	return STACK_RND_MASK << PAGE_SHIFT;
+}
+
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave at least a ~32 MB hole.
+ */
+#define MIN_GAP (32*1024*1024)
+#define MAX_GAP (STACK_TOP/6*5)
+
+static inline int mmap_is_legacy(void)
+{
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+		return 1;
+	return sysctl_legacy_va_layout;
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+	if (is_32bit_task())
+		return (get_random_int() & 0x7ff) << PAGE_SHIFT;
+	else
+		return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT;
+}
+
+static unsigned long mmap_base_legacy(unsigned long rnd)
+{
+	return TASK_UNMAPPED_BASE + rnd;
+}
+
+static inline unsigned long mmap_base(unsigned long rnd)
+{
+	unsigned long gap = rlimit(RLIMIT_STACK);
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+	gap &= PAGE_MASK;
+	return STACK_TOP - stack_maxrandom_size() - rnd - gap;
+}
+
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	struct vm_unmapped_area_info info;
+	int do_color_align;
+
+	if (len > TASK_SIZE - mmap_min_addr)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	do_color_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_color_align = !is_32bit_task();
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = mm->mmap_base;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	return vm_unmapped_area(&info);
+}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	struct vm_unmapped_area_info info;
+	int do_color_align;
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE - mmap_min_addr)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	/* requesting a specific address */
+	if (addr) {
+		addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+				(!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	do_color_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_color_align = !is_32bit_task();
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+	info.high_limit = mm->mmap_base;
+	info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
+{
+	if (is_compat_task() || (TASK_SIZE >= (1UL << 53)))
+		return 0;
+	if (!(flags & MAP_FIXED))
+		addr = 0;
+	if ((addr + len) >= TASK_SIZE)
+		return crst_table_upgrade(current->mm, 1UL << 53);
+	return 0;
+}
+
+static unsigned long
+s390_get_unmapped_area(struct file *filp, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long area;
+	int rc;
+
+	area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
+	if (!(area & ~PAGE_MASK))
+		return area;
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+		/* Upgrade the page table to 4 levels and retry. */
+		rc = crst_table_upgrade(mm, 1UL << 53);
+		if (rc)
+			return (unsigned long) rc;
+		area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
+	}
+	return area;
+}
+
+static unsigned long
+s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long area;
+	int rc;
+
+	area = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
+	if (!(area & ~PAGE_MASK))
+		return area;
+	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
+		/* Upgrade the page table to 4 levels and retry. */
+		rc = crst_table_upgrade(mm, 1UL << 53);
+		if (rc)
+			return (unsigned long) rc;
+		area = arch_get_unmapped_area_topdown(filp, addr, len,
+						      pgoff, flags);
+	}
+	return area;
+}
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
+	/*
+	 * Fall back to the standard layout if the personality
+	 * bit is set, or if the expected stack growth is unlimited:
+	 */
+	if (mmap_is_legacy()) {
+		mm->mmap_base = mmap_base_legacy(random_factor);
+		mm->get_unmapped_area = s390_get_unmapped_area;
+	} else {
+		mm->mmap_base = mmap_base(random_factor);
+		mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+	}
+}
+
+static int __init setup_mmap_rnd(void)
+{
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x9672:
+	case 0x2064:
+	case 0x2066:
+	case 0x2084:
+	case 0x2086:
+	case 0x2094:
+	case 0x2096:
+	case 0x2097:
+	case 0x2098:
+	case 0x2817:
+	case 0x2818:
+	case 0x2827:
+	case 0x2828:
+		mmap_rnd_mask = 0x7ffUL;
+		mmap_align_mask = 0UL;
+		break;
+	case 0x2964:	/* z13 */
+	default:
+		mmap_rnd_mask = 0x3ff80UL;
+		mmap_align_mask = 0x7fUL;
+		break;
+	}
+	return 0;
+}
+early_initcall(setup_mmap_rnd);
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
new file mode 100644
index 000000000..a90d45e9d
--- /dev/null
+++ b/arch/s390/mm/page-states.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * Guest page hinting for unused pages.
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+
+#define ESSA_SET_STABLE		1
+#define ESSA_SET_UNUSED		2
+
+static int cmma_flag = 1;
+
+static int __init cmma(char *str)
+{
+	char *parm;
+
+	parm = strstrip(str);
+	if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) {
+		cmma_flag = 1;
+		return 1;
+	}
+	cmma_flag = 0;
+	if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0)
+		return 1;
+	return 0;
+}
+__setup("cmma=", cmma);
+
+void __init cmma_init(void)
+{
+	register unsigned long tmp asm("0") = 0;
+	register int rc asm("1") = -EOPNOTSUPP;
+
+	if (!cmma_flag)
+		return;
+	asm volatile(
+		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
+		"0:     la      %0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+&d" (rc), "+&d" (tmp));
+	if (rc)
+		cmma_flag = 0;
+}
+
+static inline void set_page_unstable(struct page *page, int order)
+{
+	int i, rc;
+
+	for (i = 0; i < (1 << order); i++)
+		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+			     : "=&d" (rc)
+			     : "a" (page_to_phys(page + i)),
+			       "i" (ESSA_SET_UNUSED));
+}
+
+void arch_free_page(struct page *page, int order)
+{
+	if (!cmma_flag)
+		return;
+	set_page_unstable(page, order);
+}
+
+static inline void set_page_stable(struct page *page, int order)
+{
+	int i, rc;
+
+	for (i = 0; i < (1 << order); i++)
+		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
+			     : "=&d" (rc)
+			     : "a" (page_to_phys(page + i)),
+			       "i" (ESSA_SET_STABLE));
+}
+
+void arch_alloc_page(struct page *page, int order)
+{
+	if (!cmma_flag)
+		return;
+	set_page_stable(page, order);
+}
+
+void arch_set_page_states(int make_stable)
+{
+	unsigned long flags, order, t;
+	struct list_head *l;
+	struct page *page;
+	struct zone *zone;
+
+	if (!cmma_flag)
+		return;
+	if (make_stable)
+		drain_local_pages(NULL);
+	for_each_populated_zone(zone) {
+		spin_lock_irqsave(&zone->lock, flags);
+		for_each_migratetype_order(order, t) {
+			list_for_each(l, &zone->free_area[order].free_list[t]) {
+				page = list_entry(l, struct page, lru);
+				if (make_stable)
+					set_page_stable(page, order);
+				else
+					set_page_unstable(page, order);
+			}
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
new file mode 100644
index 000000000..749c98407
--- /dev/null
+++ b/arch/s390/mm/pageattr.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/hugetlb.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
+#include <asm/facility.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+
+#if PAGE_DEFAULT_KEY
+static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
+{
+	asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
+		     : [addr] "+a" (addr) : [skey] "d" (skey));
+	return addr;
+}
+
+void __storage_key_init_range(unsigned long start, unsigned long end)
+{
+	unsigned long boundary, size;
+
+	while (start < end) {
+		if (MACHINE_HAS_EDAT1) {
+			/* set storage keys for a 1MB frame */
+			size = 1UL << 20;
+			boundary = (start + size) & ~(size - 1);
+			if (boundary <= end) {
+				do {
+					start = sske_frame(start, PAGE_DEFAULT_KEY);
+				} while (start < boundary);
+				continue;
+			}
+		}
+		page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
+		start += PAGE_SIZE;
+	}
+}
+#endif
+
+static pte_t *walk_page_table(unsigned long addr)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pgdp = pgd_offset_k(addr);
+	if (pgd_none(*pgdp))
+		return NULL;
+	pudp = pud_offset(pgdp, addr);
+	if (pud_none(*pudp) || pud_large(*pudp))
+		return NULL;
+	pmdp = pmd_offset(pudp, addr);
+	if (pmd_none(*pmdp) || pmd_large(*pmdp))
+		return NULL;
+	ptep = pte_offset_kernel(pmdp, addr);
+	if (pte_none(*ptep))
+		return NULL;
+	return ptep;
+}
+
+static void change_page_attr(unsigned long addr, int numpages,
+			     pte_t (*set) (pte_t))
+{
+	pte_t *ptep, pte;
+	int i;
+
+	for (i = 0; i < numpages; i++) {
+		ptep = walk_page_table(addr);
+		if (WARN_ON_ONCE(!ptep))
+			break;
+		pte = *ptep;
+		pte = set(pte);
+		__ptep_ipte(addr, ptep);
+		*ptep = pte;
+		addr += PAGE_SIZE;
+	}
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	change_page_attr(addr, numpages, pte_wrprotect);
+	return 0;
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	change_page_attr(addr, numpages, pte_mkwrite);
+	return 0;
+}
+
+/* not possible */
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	return 0;
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+
+static void ipte_range(pte_t *pte, unsigned long address, int nr)
+{
+	int i;
+
+	if (test_facility(13)) {
+		__ptep_ipte_range(address, nr - 1, pte);
+		return;
+	}
+	for (i = 0; i < nr; i++) {
+		__ptep_ipte(address, pte);
+		address += PAGE_SIZE;
+		pte++;
+	}
+}
+
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long address;
+	int nr, i, j;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	for (i = 0; i < numpages;) {
+		address = page_to_phys(page + i);
+		pgd = pgd_offset_k(address);
+		pud = pud_offset(pgd, address);
+		pmd = pmd_offset(pud, address);
+		pte = pte_offset_kernel(pmd, address);
+		nr = (unsigned long)pte >> ilog2(sizeof(long));
+		nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
+		nr = min(numpages - i, nr);
+		if (enable) {
+			for (j = 0; j < nr; j++) {
+				pte_val(*pte) = __pa(address);
+				address += PAGE_SIZE;
+				pte++;
+			}
+		} else {
+			ipte_range(pte, address, nr);
+		}
+		i += nr;
+	}
+}
+
+#ifdef CONFIG_HIBERNATION
+bool kernel_page_present(struct page *page)
+{
+	unsigned long addr;
+	int cc;
+
+	addr = page_to_phys(page);
+	asm volatile(
+		"	lra	%1,0(%1)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (cc), "+a" (addr) : : "cc");
+	return cc == 0;
+}
+#endif /* CONFIG_HIBERNATION */
+
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
new file mode 100644
index 000000000..b33f66110
--- /dev/null
+++ b/arch/s390/mm/pgtable.c
@@ -0,0 +1,1412 @@
+/*
+ *    Copyright IBM Corp. 2007, 2011
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/quicklist.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/swapops.h>
+#include <linux/sysctl.h>
+#include <linux/ksm.h>
+#include <linux/mman.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+
+#define ALLOC_ORDER	2
+#define FRAG_MASK	0x03
+
+unsigned long *crst_table_alloc(struct mm_struct *mm)
+{
+	struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+
+	if (!page)
+		return NULL;
+	return (unsigned long *) page_to_phys(page);
+}
+
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
+{
+	free_pages((unsigned long) table, ALLOC_ORDER);
+}
+
+static void __crst_table_upgrade(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	if (current->active_mm == mm) {
+		clear_user_asce();
+		set_user_asce(mm);
+	}
+	__tlb_flush_local();
+}
+
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+{
+	unsigned long *table, *pgd;
+	unsigned long entry;
+	int flush;
+
+	BUG_ON(limit > (1UL << 53));
+	flush = 0;
+repeat:
+	table = crst_table_alloc(mm);
+	if (!table)
+		return -ENOMEM;
+	spin_lock_bh(&mm->page_table_lock);
+	if (mm->context.asce_limit < limit) {
+		pgd = (unsigned long *) mm->pgd;
+		if (mm->context.asce_limit <= (1UL << 31)) {
+			entry = _REGION3_ENTRY_EMPTY;
+			mm->context.asce_limit = 1UL << 42;
+			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+						_ASCE_USER_BITS |
+						_ASCE_TYPE_REGION3;
+		} else {
+			entry = _REGION2_ENTRY_EMPTY;
+			mm->context.asce_limit = 1UL << 53;
+			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+						_ASCE_USER_BITS |
+						_ASCE_TYPE_REGION2;
+		}
+		crst_table_init(table, entry);
+		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+		mm->pgd = (pgd_t *) table;
+		mm->task_size = mm->context.asce_limit;
+		table = NULL;
+		flush = 1;
+	}
+	spin_unlock_bh(&mm->page_table_lock);
+	if (table)
+		crst_table_free(mm, table);
+	if (mm->context.asce_limit < limit)
+		goto repeat;
+	if (flush)
+		on_each_cpu(__crst_table_upgrade, mm, 0);
+	return 0;
+}
+
+void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+{
+	pgd_t *pgd;
+
+	if (current->active_mm == mm) {
+		clear_user_asce();
+		__tlb_flush_mm(mm);
+	}
+	while (mm->context.asce_limit > limit) {
+		pgd = mm->pgd;
+		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
+		case _REGION_ENTRY_TYPE_R2:
+			mm->context.asce_limit = 1UL << 42;
+			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+						_ASCE_USER_BITS |
+						_ASCE_TYPE_REGION3;
+			break;
+		case _REGION_ENTRY_TYPE_R3:
+			mm->context.asce_limit = 1UL << 31;
+			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+						_ASCE_USER_BITS |
+						_ASCE_TYPE_SEGMENT;
+			break;
+		default:
+			BUG();
+		}
+		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+		mm->task_size = mm->context.asce_limit;
+		crst_table_free(mm, (unsigned long *) pgd);
+	}
+	if (current->active_mm == mm)
+		set_user_asce(mm);
+}
+
+#ifdef CONFIG_PGSTE
+
+/**
+ * gmap_alloc - allocate a guest address space
+ * @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
+ *
+ * Returns a guest address space structure.
+ */
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
+{
+	struct gmap *gmap;
+	struct page *page;
+	unsigned long *table;
+	unsigned long etype, atype;
+
+	if (limit < (1UL << 31)) {
+		limit = (1UL << 31) - 1;
+		atype = _ASCE_TYPE_SEGMENT;
+		etype = _SEGMENT_ENTRY_EMPTY;
+	} else if (limit < (1UL << 42)) {
+		limit = (1UL << 42) - 1;
+		atype = _ASCE_TYPE_REGION3;
+		etype = _REGION3_ENTRY_EMPTY;
+	} else if (limit < (1UL << 53)) {
+		limit = (1UL << 53) - 1;
+		atype = _ASCE_TYPE_REGION2;
+		etype = _REGION2_ENTRY_EMPTY;
+	} else {
+		limit = -1UL;
+		atype = _ASCE_TYPE_REGION1;
+		etype = _REGION1_ENTRY_EMPTY;
+	}
+	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
+	if (!gmap)
+		goto out;
+	INIT_LIST_HEAD(&gmap->crst_list);
+	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+	spin_lock_init(&gmap->guest_table_lock);
+	gmap->mm = mm;
+	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	if (!page)
+		goto out_free;
+	page->index = 0;
+	list_add(&page->lru, &gmap->crst_list);
+	table = (unsigned long *) page_to_phys(page);
+	crst_table_init(table, etype);
+	gmap->table = table;
+	gmap->asce = atype | _ASCE_TABLE_LENGTH |
+		_ASCE_USER_BITS | __pa(table);
+	gmap->asce_end = limit;
+	down_write(&mm->mmap_sem);
+	list_add(&gmap->list, &mm->context.gmap_list);
+	up_write(&mm->mmap_sem);
+	return gmap;
+
+out_free:
+	kfree(gmap);
+out:
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(gmap_alloc);
+
+static void gmap_flush_tlb(struct gmap *gmap)
+{
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_asce(gmap->mm, gmap->asce);
+	else
+		__tlb_flush_global();
+}
+
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+	struct radix_tree_iter iter;
+	unsigned long indices[16];
+	unsigned long index;
+	void **slot;
+	int i, nr;
+
+	/* A radix tree is freed by deleting all of its entries */
+	index = 0;
+	do {
+		nr = 0;
+		radix_tree_for_each_slot(slot, root, &iter, index) {
+			indices[nr] = iter.index;
+			if (++nr == 16)
+				break;
+		}
+		for (i = 0; i < nr; i++) {
+			index = indices[i];
+			radix_tree_delete(root, index);
+		}
+	} while (nr > 0);
+}
+
+/**
+ * gmap_free - free a guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_free(struct gmap *gmap)
+{
+	struct page *page, *next;
+
+	/* Flush tlb. */
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_asce(gmap->mm, gmap->asce);
+	else
+		__tlb_flush_global();
+
+	/* Free all segment & region tables. */
+	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
+		__free_pages(page, ALLOC_ORDER);
+	gmap_radix_tree_free(&gmap->guest_to_host);
+	gmap_radix_tree_free(&gmap->host_to_guest);
+	down_write(&gmap->mm->mmap_sem);
+	list_del(&gmap->list);
+	up_write(&gmap->mm->mmap_sem);
+	kfree(gmap);
+}
+EXPORT_SYMBOL_GPL(gmap_free);
+
+/**
+ * gmap_enable - switch primary space to the guest address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_enable(struct gmap *gmap)
+{
+	S390_lowcore.gmap = (unsigned long) gmap;
+}
+EXPORT_SYMBOL_GPL(gmap_enable);
+
+/**
+ * gmap_disable - switch back to the standard primary address space
+ * @gmap: pointer to the guest address space structure
+ */
+void gmap_disable(struct gmap *gmap)
+{
+	S390_lowcore.gmap = 0UL;
+}
+EXPORT_SYMBOL_GPL(gmap_disable);
+
+/*
+ * gmap_alloc_table is assumed to be called with mmap_sem held
+ */
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+			    unsigned long init, unsigned long gaddr)
+{
+	struct page *page;
+	unsigned long *new;
+
+	/* since we dont free the gmap table until gmap_free we can unlock */
+	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+	if (!page)
+		return -ENOMEM;
+	new = (unsigned long *) page_to_phys(page);
+	crst_table_init(new, init);
+	spin_lock(&gmap->mm->page_table_lock);
+	if (*table & _REGION_ENTRY_INVALID) {
+		list_add(&page->lru, &gmap->crst_list);
+		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+			(*table & _REGION_ENTRY_TYPE_MASK);
+		page->index = gaddr;
+		page = NULL;
+	}
+	spin_unlock(&gmap->mm->page_table_lock);
+	if (page)
+		__free_pages(page, ALLOC_ORDER);
+	return 0;
+}
+
+/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+	struct page *page;
+	unsigned long offset, mask;
+
+	offset = (unsigned long) entry / sizeof(unsigned long);
+	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+	mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
+	page = virt_to_page((void *)((unsigned long) entry & mask));
+	return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+	unsigned long *entry;
+	int flush = 0;
+
+	spin_lock(&gmap->guest_table_lock);
+	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+	if (entry) {
+		flush = (*entry != _SEGMENT_ENTRY_INVALID);
+		*entry = _SEGMENT_ENTRY_INVALID;
+	}
+	spin_unlock(&gmap->guest_table_lock);
+	return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr;
+
+	vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+						   gaddr >> PMD_SHIFT);
+	return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
+ * gmap_unmap_segment - unmap segment from the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @to: address in the guest address space
+ * @len: length of the memory area to unmap
+ *
+ * Returns 0 if the unmap succeeded, -EINVAL if not.
+ */
+int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
+{
+	unsigned long off;
+	int flush;
+
+	if ((to | len) & (PMD_SIZE - 1))
+		return -EINVAL;
+	if (len == 0 || to + len < to)
+		return -EINVAL;
+
+	flush = 0;
+	down_write(&gmap->mm->mmap_sem);
+	for (off = 0; off < len; off += PMD_SIZE)
+		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+	up_write(&gmap->mm->mmap_sem);
+	if (flush)
+		gmap_flush_tlb(gmap);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_unmap_segment);
+
+/**
+ * gmap_mmap_segment - map a segment to the guest address space
+ * @gmap: pointer to the guest address space structure
+ * @from: source address in the parent address space
+ * @to: target address in the guest address space
+ * @len: length of the memory area to map
+ *
+ * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
+ */
+int gmap_map_segment(struct gmap *gmap, unsigned long from,
+		     unsigned long to, unsigned long len)
+{
+	unsigned long off;
+	int flush;
+
+	if ((from | to | len) & (PMD_SIZE - 1))
+		return -EINVAL;
+	if (len == 0 || from + len < from || to + len < to ||
+	    from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
+		return -EINVAL;
+
+	flush = 0;
+	down_write(&gmap->mm->mmap_sem);
+	for (off = 0; off < len; off += PMD_SIZE) {
+		/* Remove old translation */
+		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+		/* Store new translation */
+		if (radix_tree_insert(&gmap->guest_to_host,
+				      (to + off) >> PMD_SHIFT,
+				      (void *) from + off))
+			break;
+	}
+	up_write(&gmap->mm->mmap_sem);
+	if (flush)
+		gmap_flush_tlb(gmap);
+	if (off >= len)
+		return 0;
+	gmap_unmap_segment(gmap, to, len);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(gmap_map_segment);
+
+/**
+ * __gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr;
+
+	vmaddr = (unsigned long)
+		radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+	return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
+}
+EXPORT_SYMBOL_GPL(__gmap_translate);
+
+/**
+ * gmap_translate - translate a guest address to a user space address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ *
+ * Returns user space address which corresponds to the guest address or
+ * -EFAULT if no such mapping exists.
+ * This function does not establish potentially missing page table entries.
+ */
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long rc;
+
+	down_read(&gmap->mm->mmap_sem);
+	rc = __gmap_translate(gmap, gaddr);
+	up_read(&gmap->mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_translate);
+
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+			unsigned long vmaddr)
+{
+	struct gmap *gmap;
+	int flush;
+
+	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+		flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+		if (flush)
+			gmap_flush_tlb(gmap);
+	}
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	unsigned long *table;
+	spinlock_t *ptl;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	int rc;
+
+	/* Create higher level tables in the gmap page table */
+	table = gmap->table;
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+		table += (gaddr >> 53) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+				     gaddr & 0xffe0000000000000UL))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+		table += (gaddr >> 42) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+				     gaddr & 0xfffffc0000000000UL))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+		table += (gaddr >> 31) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+				     gaddr & 0xffffffff80000000UL))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	table += (gaddr >> 20) & 0x7ff;
+	/* Walk the parent mm page table */
+	mm = gmap->mm;
+	pgd = pgd_offset(mm, vmaddr);
+	VM_BUG_ON(pgd_none(*pgd));
+	pud = pud_offset(pgd, vmaddr);
+	VM_BUG_ON(pud_none(*pud));
+	pmd = pmd_offset(pud, vmaddr);
+	VM_BUG_ON(pmd_none(*pmd));
+	/* large pmds cannot yet be handled */
+	if (pmd_large(*pmd))
+		return -EFAULT;
+	/* Link gmap segment table entry location to page table. */
+	rc = radix_tree_preload(GFP_KERNEL);
+	if (rc)
+		return rc;
+	ptl = pmd_lock(mm, pmd);
+	spin_lock(&gmap->guest_table_lock);
+	if (*table == _SEGMENT_ENTRY_INVALID) {
+		rc = radix_tree_insert(&gmap->host_to_guest,
+				       vmaddr >> PMD_SHIFT, table);
+		if (!rc)
+			*table = pmd_val(*pmd);
+	} else
+		rc = 0;
+	spin_unlock(&gmap->guest_table_lock);
+	spin_unlock(ptl);
+	radix_tree_preload_end();
+	return rc;
+}
+
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ */
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+	       unsigned int fault_flags)
+{
+	unsigned long vmaddr;
+	int rc;
+
+	down_read(&gmap->mm->mmap_sem);
+	vmaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(vmaddr)) {
+		rc = vmaddr;
+		goto out_up;
+	}
+	if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
+		rc = -EFAULT;
+		goto out_up;
+	}
+	rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
+	up_read(&gmap->mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_fault);
+
+static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
+{
+	if (!non_swap_entry(entry))
+		dec_mm_counter(mm, MM_SWAPENTS);
+	else if (is_migration_entry(entry)) {
+		struct page *page = migration_entry_to_page(entry);
+
+		if (PageAnon(page))
+			dec_mm_counter(mm, MM_ANONPAGES);
+		else
+			dec_mm_counter(mm, MM_FILEPAGES);
+	}
+	free_swap_and_cache(entry);
+}
+
+/*
+ * this function is assumed to be called with mmap_sem held
+ */
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr, ptev, pgstev;
+	pte_t *ptep, pte;
+	spinlock_t *ptl;
+	pgste_t pgste;
+
+	/* Find the vm address for the guest address */
+	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+						   gaddr >> PMD_SHIFT);
+	if (!vmaddr)
+		return;
+	vmaddr |= gaddr & ~PMD_MASK;
+	/* Get pointer to the page table entry */
+	ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
+	if (unlikely(!ptep))
+		return;
+	pte = *ptep;
+	if (!pte_swap(pte))
+		goto out_pte;
+	/* Zap unused and logically-zero pages */
+	pgste = pgste_get_lock(ptep);
+	pgstev = pgste_val(pgste);
+	ptev = pte_val(pte);
+	if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
+	    ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
+		gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
+		pte_clear(gmap->mm, vmaddr, ptep);
+	}
+	pgste_set_unlock(ptep, pgste);
+out_pte:
+	pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(__gmap_zap);
+
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
+{
+	unsigned long gaddr, vmaddr, size;
+	struct vm_area_struct *vma;
+
+	down_read(&gmap->mm->mmap_sem);
+	for (gaddr = from; gaddr < to;
+	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+		/* Find the vm address for the guest address */
+		vmaddr = (unsigned long)
+			radix_tree_lookup(&gmap->guest_to_host,
+					  gaddr >> PMD_SHIFT);
+		if (!vmaddr)
+			continue;
+		vmaddr |= gaddr & ~PMD_MASK;
+		/* Find vma in the parent mm */
+		vma = find_vma(gmap->mm, vmaddr);
+		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+		zap_page_range(vma, vmaddr, size, NULL);
+	}
+	up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
+static LIST_HEAD(gmap_notifier_list);
+static DEFINE_SPINLOCK(gmap_notifier_lock);
+
+/**
+ * gmap_register_ipte_notifier - register a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_register_ipte_notifier(struct gmap_notifier *nb)
+{
+	spin_lock(&gmap_notifier_lock);
+	list_add(&nb->list, &gmap_notifier_list);
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
+
+/**
+ * gmap_unregister_ipte_notifier - remove a pte invalidation callback
+ * @nb: pointer to the gmap notifier block
+ */
+void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
+{
+	spin_lock(&gmap_notifier_lock);
+	list_del_init(&nb->list);
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
+
+/**
+ * gmap_ipte_notify - mark a range of ptes for invalidation notification
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @len: size of area
+ *
+ * Returns 0 if for each page in the given range a gmap mapping exists and
+ * the invalidation notification could be set. If the gmap mapping is missing
+ * for one or more pages -EFAULT is returned. If no memory could be allocated
+ * -ENOMEM is returned. This function establishes missing page table entries.
+ */
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
+{
+	unsigned long addr;
+	spinlock_t *ptl;
+	pte_t *ptep, entry;
+	pgste_t pgste;
+	int rc = 0;
+
+	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
+		return -EINVAL;
+	down_read(&gmap->mm->mmap_sem);
+	while (len) {
+		/* Convert gmap address and connect the page tables */
+		addr = __gmap_translate(gmap, gaddr);
+		if (IS_ERR_VALUE(addr)) {
+			rc = addr;
+			break;
+		}
+		/* Get the page mapped */
+		if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) {
+			rc = -EFAULT;
+			break;
+		}
+		rc = __gmap_link(gmap, gaddr, addr);
+		if (rc)
+			break;
+		/* Walk the process page table, lock and get pte pointer */
+		ptep = get_locked_pte(gmap->mm, addr, &ptl);
+		VM_BUG_ON(!ptep);
+		/* Set notification bit in the pgste of the pte */
+		entry = *ptep;
+		if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
+			pgste = pgste_get_lock(ptep);
+			pgste_val(pgste) |= PGSTE_IN_BIT;
+			pgste_set_unlock(ptep, pgste);
+			gaddr += PAGE_SIZE;
+			len -= PAGE_SIZE;
+		}
+		pte_unmap_unlock(ptep, ptl);
+	}
+	up_read(&gmap->mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_ipte_notify);
+
+/**
+ * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
+ * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
+ * @pte: pointer to the page table entry
+ *
+ * This function is assumed to be called with the page table lock held
+ * for the pte to notify.
+ */
+void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
+{
+	unsigned long offset, gaddr;
+	unsigned long *table;
+	struct gmap_notifier *nb;
+	struct gmap *gmap;
+
+	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+	offset = offset * (4096 / sizeof(pte_t));
+	spin_lock(&gmap_notifier_lock);
+	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+		table = radix_tree_lookup(&gmap->host_to_guest,
+					  vmaddr >> PMD_SHIFT);
+		if (!table)
+			continue;
+		gaddr = __gmap_segment_gaddr(table) + offset;
+		list_for_each_entry(nb, &gmap_notifier_list, list)
+			nb->notifier_call(gmap, gaddr);
+	}
+	spin_unlock(&gmap_notifier_lock);
+}
+EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
+
+static inline int page_table_with_pgste(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == 0;
+}
+
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
+{
+	struct page *page;
+	unsigned long *table;
+
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!page)
+		return NULL;
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	atomic_set(&page->_mapcount, 0);
+	table = (unsigned long *) page_to_phys(page);
+	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	return table;
+}
+
+static inline void page_table_free_pgste(unsigned long *table)
+{
+	struct page *page;
+
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	pgtable_page_dtor(page);
+	atomic_set(&page->_mapcount, -1);
+	__free_page(page);
+}
+
+int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+			  unsigned long key, bool nq)
+{
+	spinlock_t *ptl;
+	pgste_t old, new;
+	pte_t *ptep;
+
+	down_read(&mm->mmap_sem);
+retry:
+	ptep = get_locked_pte(mm, addr, &ptl);
+	if (unlikely(!ptep)) {
+		up_read(&mm->mmap_sem);
+		return -EFAULT;
+	}
+	if (!(pte_val(*ptep) & _PAGE_INVALID) &&
+	     (pte_val(*ptep) & _PAGE_PROTECT)) {
+		pte_unmap_unlock(ptep, ptl);
+		if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
+			up_read(&mm->mmap_sem);
+			return -EFAULT;
+		}
+		goto retry;
+	}
+
+	new = old = pgste_get_lock(ptep);
+	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
+			    PGSTE_ACC_BITS | PGSTE_FP_BIT);
+	pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
+	pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
+	if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+		unsigned long address, bits, skey;
+
+		address = pte_val(*ptep) & PAGE_MASK;
+		skey = (unsigned long) page_get_storage_key(address);
+		bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
+		skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
+		/* Set storage key ACC and FP */
+		page_set_storage_key(address, skey, !nq);
+		/* Merge host changed & referenced into pgste  */
+		pgste_val(new) |= bits << 52;
+	}
+	/* changing the guest storage key is considered a change of the page */
+	if ((pgste_val(new) ^ pgste_val(old)) &
+	    (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
+		pgste_val(new) |= PGSTE_UC_BIT;
+
+	pgste_set_unlock(ptep, new);
+	pte_unmap_unlock(ptep, ptl);
+	up_read(&mm->mmap_sem);
+	return 0;
+}
+EXPORT_SYMBOL(set_guest_storage_key);
+
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+{
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pte_t *ptep;
+	uint64_t physaddr;
+	unsigned long key = 0;
+
+	down_read(&mm->mmap_sem);
+	ptep = get_locked_pte(mm, addr, &ptl);
+	if (unlikely(!ptep)) {
+		up_read(&mm->mmap_sem);
+		return -EFAULT;
+	}
+	pgste = pgste_get_lock(ptep);
+
+	if (pte_val(*ptep) & _PAGE_INVALID) {
+		key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
+		key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
+	} else {
+		physaddr = pte_val(*ptep) & PAGE_MASK;
+		key = page_get_storage_key(physaddr);
+
+		/* Reflect guest's logical view, not physical */
+		if (pgste_val(pgste) & PGSTE_GR_BIT)
+			key |= _PAGE_REFERENCED;
+		if (pgste_val(pgste) & PGSTE_GC_BIT)
+			key |= _PAGE_CHANGED;
+	}
+
+	pgste_set_unlock(ptep, pgste);
+	pte_unmap_unlock(ptep, ptl);
+	up_read(&mm->mmap_sem);
+	return key;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
+static int page_table_allocate_pgste_min = 0;
+static int page_table_allocate_pgste_max = 1;
+int page_table_allocate_pgste = 0;
+EXPORT_SYMBOL(page_table_allocate_pgste);
+
+static struct ctl_table page_table_sysctl[] = {
+	{
+		.procname	= "allocate_pgste",
+		.data		= &page_table_allocate_pgste,
+		.maxlen		= sizeof(int),
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= proc_dointvec,
+		.extra1		= &page_table_allocate_pgste_min,
+		.extra2		= &page_table_allocate_pgste_max,
+	},
+	{ }
+};
+
+static struct ctl_table page_table_sysctl_dir[] = {
+	{
+		.procname	= "vm",
+		.maxlen		= 0,
+		.mode		= 0555,
+		.child		= page_table_sysctl,
+	},
+	{ }
+};
+
+static int __init page_table_register_sysctl(void)
+{
+	return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM;
+}
+__initcall(page_table_register_sysctl);
+
+#else /* CONFIG_PGSTE */
+
+static inline int page_table_with_pgste(struct page *page)
+{
+	return 0;
+}
+
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
+{
+	return NULL;
+}
+
+static inline void page_table_free_pgste(unsigned long *table)
+{
+}
+
+static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+			unsigned long vmaddr)
+{
+}
+
+#endif /* CONFIG_PGSTE */
+
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+	unsigned int old, new;
+
+	do {
+		old = atomic_read(v);
+		new = old ^ bits;
+	} while (atomic_cmpxchg(v, old, new) != old);
+	return new;
+}
+
+/*
+ * page table entry allocation/free routines.
+ */
+unsigned long *page_table_alloc(struct mm_struct *mm)
+{
+	unsigned long *uninitialized_var(table);
+	struct page *uninitialized_var(page);
+	unsigned int mask, bit;
+
+	if (mm_alloc_pgste(mm))
+		return page_table_alloc_pgste(mm);
+	/* Allocate fragments of a 4K page as 1K/2K page table */
+	spin_lock_bh(&mm->context.list_lock);
+	mask = FRAG_MASK;
+	if (!list_empty(&mm->context.pgtable_list)) {
+		page = list_first_entry(&mm->context.pgtable_list,
+					struct page, lru);
+		table = (unsigned long *) page_to_phys(page);
+		mask = atomic_read(&page->_mapcount);
+		mask = mask | (mask >> 4);
+	}
+	if ((mask & FRAG_MASK) == FRAG_MASK) {
+		spin_unlock_bh(&mm->context.list_lock);
+		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+		if (!page)
+			return NULL;
+		if (!pgtable_page_ctor(page)) {
+			__free_page(page);
+			return NULL;
+		}
+		atomic_set(&page->_mapcount, 1);
+		table = (unsigned long *) page_to_phys(page);
+		clear_table(table, _PAGE_INVALID, PAGE_SIZE);
+		spin_lock_bh(&mm->context.list_lock);
+		list_add(&page->lru, &mm->context.pgtable_list);
+	} else {
+		for (bit = 1; mask & bit; bit <<= 1)
+			table += PTRS_PER_PTE;
+		mask = atomic_xor_bits(&page->_mapcount, bit);
+		if ((mask & FRAG_MASK) == FRAG_MASK)
+			list_del(&page->lru);
+	}
+	spin_unlock_bh(&mm->context.list_lock);
+	return table;
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table)
+{
+	struct page *page;
+	unsigned int bit, mask;
+
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	if (page_table_with_pgste(page))
+		return page_table_free_pgste(table);
+	/* Free 1K/2K page table fragment of a 4K page */
+	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
+	spin_lock_bh(&mm->context.list_lock);
+	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
+		list_del(&page->lru);
+	mask = atomic_xor_bits(&page->_mapcount, bit);
+	if (mask & FRAG_MASK)
+		list_add(&page->lru, &mm->context.pgtable_list);
+	spin_unlock_bh(&mm->context.list_lock);
+	if (mask == 0) {
+		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
+		__free_page(page);
+	}
+}
+
+static void __page_table_free_rcu(void *table, unsigned bit)
+{
+	struct page *page;
+
+	if (bit == FRAG_MASK)
+		return page_table_free_pgste(table);
+	/* Free 1K/2K page table fragment of a 4K page */
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
+		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
+		__free_page(page);
+	}
+}
+
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+			 unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	struct page *page;
+	unsigned int bit, mask;
+
+	mm = tlb->mm;
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	if (page_table_with_pgste(page)) {
+		gmap_unlink(mm, table, vmaddr);
+		table = (unsigned long *) (__pa(table) | FRAG_MASK);
+		tlb_remove_table(tlb, table);
+		return;
+	}
+	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+	spin_lock_bh(&mm->context.list_lock);
+	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
+		list_del(&page->lru);
+	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
+	if (mask & FRAG_MASK)
+		list_add_tail(&page->lru, &mm->context.pgtable_list);
+	spin_unlock_bh(&mm->context.list_lock);
+	table = (unsigned long *) (__pa(table) | (bit << 4));
+	tlb_remove_table(tlb, table);
+}
+
+static void __tlb_remove_table(void *_table)
+{
+	const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
+	void *table = (void *)((unsigned long) _table & ~mask);
+	unsigned type = (unsigned long) _table & mask;
+
+	if (type)
+		__page_table_free_rcu(table, type);
+	else
+		free_pages((unsigned long) table, ALLOC_ORDER);
+}
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+	/* Simply deliver the interrupt */
+}
+
+static void tlb_remove_table_one(void *table)
+{
+	/*
+	 * This isn't an RCU grace period and hence the page-tables cannot be
+	 * assumed to be actually RCU-freed.
+	 *
+	 * It is however sufficient for software page-table walkers that rely
+	 * on IRQ disabling. See the comment near struct mmu_table_batch.
+	 */
+	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+	__tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+	struct mmu_table_batch *batch;
+	int i;
+
+	batch = container_of(head, struct mmu_table_batch, rcu);
+
+	for (i = 0; i < batch->nr; i++)
+		__tlb_remove_table(batch->tables[i]);
+
+	free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	if (*batch) {
+		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+		*batch = NULL;
+	}
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	tlb->mm->context.flush_mm = 1;
+	if (*batch == NULL) {
+		*batch = (struct mmu_table_batch *)
+			__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+		if (*batch == NULL) {
+			__tlb_flush_mm_lazy(tlb->mm);
+			tlb_remove_table_one(table);
+			return;
+		}
+		(*batch)->nr = 0;
+	}
+	(*batch)->tables[(*batch)->nr++] = table;
+	if ((*batch)->nr == MAX_TABLE_BATCH)
+		tlb_flush_mmu(tlb);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void thp_split_vma(struct vm_area_struct *vma)
+{
+	unsigned long addr;
+
+	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
+		follow_page(vma, addr, FOLL_SPLIT);
+}
+
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *vma;
+
+	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+		thp_split_vma(vma);
+		vma->vm_flags &= ~VM_HUGEPAGE;
+		vma->vm_flags |= VM_NOHUGEPAGE;
+	}
+	mm->def_flags |= VM_NOHUGEPAGE;
+}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+	struct mm_struct *mm = current->mm;
+
+	/* Do we have pgstes? if yes, we are done */
+	if (mm_has_pgste(mm))
+		return 0;
+	/* Fail if the page tables are 2K */
+	if (!mm_alloc_pgste(mm))
+		return -EINVAL;
+	down_write(&mm->mmap_sem);
+	mm->context.has_pgste = 1;
+	/* split thp mappings and disable thp for future mappings */
+	thp_split_mm(mm);
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
+
+/*
+ * Enable storage key handling from now on and initialize the storage
+ * keys with the default key.
+ */
+static int __s390_enable_skey(pte_t *pte, unsigned long addr,
+			      unsigned long next, struct mm_walk *walk)
+{
+	unsigned long ptev;
+	pgste_t pgste;
+
+	pgste = pgste_get_lock(pte);
+	/*
+	 * Remove all zero page mappings,
+	 * after establishing a policy to forbid zero page mappings
+	 * following faults for that page will get fresh anonymous pages
+	 */
+	if (is_zero_pfn(pte_pfn(*pte))) {
+		ptep_flush_direct(walk->mm, addr, pte);
+		pte_val(*pte) = _PAGE_INVALID;
+	}
+	/* Clear storage key */
+	pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
+			      PGSTE_GR_BIT | PGSTE_GC_BIT);
+	ptev = pte_val(*pte);
+	if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
+		page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
+	pgste_set_unlock(pte, pgste);
+	return 0;
+}
+
+int s390_enable_skey(void)
+{
+	struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int rc = 0;
+
+	down_write(&mm->mmap_sem);
+	if (mm_use_skey(mm))
+		goto out_up;
+
+	mm->context.use_skey = 1;
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
+				MADV_UNMERGEABLE, &vma->vm_flags)) {
+			mm->context.use_skey = 0;
+			rc = -ENOMEM;
+			goto out_up;
+		}
+	}
+	mm->def_flags &= ~VM_MERGEABLE;
+
+	walk.mm = mm;
+	walk_page_range(0, TASK_SIZE, &walk);
+
+out_up:
+	up_write(&mm->mmap_sem);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_skey);
+
+/*
+ * Reset CMMA state, make all pages stable again.
+ */
+static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
+			     unsigned long next, struct mm_walk *walk)
+{
+	pgste_t pgste;
+
+	pgste = pgste_get_lock(pte);
+	pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
+	pgste_set_unlock(pte, pgste);
+	return 0;
+}
+
+void s390_reset_cmma(struct mm_struct *mm)
+{
+	struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
+
+	down_write(&mm->mmap_sem);
+	walk.mm = mm;
+	walk_page_range(0, TASK_SIZE, &walk);
+	up_write(&mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
+/*
+ * Test and reset if a guest page is dirty
+ */
+bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+	bool dirty = false;
+
+	pte = get_locked_pte(gmap->mm, address, &ptl);
+	if (unlikely(!pte))
+		return false;
+
+	if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
+		dirty = true;
+
+	spin_unlock(ptl);
+	return dirty;
+}
+EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
+			   pmd_t *pmdp)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	/* No need to flush TLB
+	 * On s390 reference bits are in storage key and never in TLB */
+	return pmdp_test_and_clear_young(vma, address, pmdp);
+}
+
+int pmdp_set_access_flags(struct vm_area_struct *vma,
+			  unsigned long address, pmd_t *pmdp,
+			  pmd_t entry, int dirty)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+
+	entry = pmd_mkyoung(entry);
+	if (dirty)
+		entry = pmd_mkdirty(entry);
+	if (pmd_same(*pmdp, entry))
+		return 0;
+	pmdp_invalidate(vma, address, pmdp);
+	set_pmd_at(vma->vm_mm, address, pmdp, entry);
+	return 1;
+}
+
+static void pmdp_splitting_flush_sync(void *arg)
+{
+	/* Simply deliver the interrupt */
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp)
+{
+	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+	if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
+			      (unsigned long *) pmdp)) {
+		/* need to serialize against gup-fast (IRQ disabled) */
+		smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
+	}
+}
+
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+	/* FIFO */
+	if (!pmd_huge_pte(mm, pmdp))
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	struct list_head *lh;
+	pgtable_t pgtable;
+	pte_t *ptep;
+
+	assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+	/* FIFO */
+	pgtable = pmd_huge_pte(mm, pmdp);
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		pmd_huge_pte(mm, pmdp) = NULL;
+	else {
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+	ptep = (pte_t *) pgtable;
+	pte_val(*ptep) = _PAGE_INVALID;
+	ptep++;
+	pte_val(*ptep) = _PAGE_INVALID;
+	return pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
new file mode 100644
index 000000000..ef7d6c8fe
--- /dev/null
+++ b/arch/s390/mm/vmem.c
@@ -0,0 +1,416 @@
+/*
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/bootmem.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/hugetlb.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+
+static DEFINE_MUTEX(vmem_mutex);
+
+struct memory_segment {
+	struct list_head list;
+	unsigned long start;
+	unsigned long size;
+};
+
+static LIST_HEAD(mem_segs);
+
+static void __ref *vmem_alloc_pages(unsigned int order)
+{
+	if (slab_is_available())
+		return (void *)__get_free_pages(GFP_KERNEL, order);
+	return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
+}
+
+static inline pud_t *vmem_pud_alloc(void)
+{
+	pud_t *pud = NULL;
+
+	pud = vmem_alloc_pages(2);
+	if (!pud)
+		return NULL;
+	clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
+	return pud;
+}
+
+static inline pmd_t *vmem_pmd_alloc(void)
+{
+	pmd_t *pmd = NULL;
+
+	pmd = vmem_alloc_pages(2);
+	if (!pmd)
+		return NULL;
+	clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
+	return pmd;
+}
+
+static pte_t __ref *vmem_pte_alloc(unsigned long address)
+{
+	pte_t *pte;
+
+	if (slab_is_available())
+		pte = (pte_t *) page_table_alloc(&init_mm);
+	else
+		pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
+					  PTRS_PER_PTE * sizeof(pte_t));
+	if (!pte)
+		return NULL;
+	clear_table((unsigned long *) pte, _PAGE_INVALID,
+		    PTRS_PER_PTE * sizeof(pte_t));
+	return pte;
+}
+
+/*
+ * Add a physical memory range to the 1:1 mapping.
+ */
+static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
+{
+	unsigned long end = start + size;
+	unsigned long address = start;
+	pgd_t *pg_dir;
+	pud_t *pu_dir;
+	pmd_t *pm_dir;
+	pte_t *pt_dir;
+	int ret = -ENOMEM;
+
+	while (address < end) {
+		pg_dir = pgd_offset_k(address);
+		if (pgd_none(*pg_dir)) {
+			pu_dir = vmem_pud_alloc();
+			if (!pu_dir)
+				goto out;
+			pgd_populate(&init_mm, pg_dir, pu_dir);
+		}
+		pu_dir = pud_offset(pg_dir, address);
+#ifndef CONFIG_DEBUG_PAGEALLOC
+		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
+		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
+			pud_val(*pu_dir) = __pa(address) |
+				_REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE |
+				(ro ? _REGION_ENTRY_PROTECT : 0);
+			address += PUD_SIZE;
+			continue;
+		}
+#endif
+		if (pud_none(*pu_dir)) {
+			pm_dir = vmem_pmd_alloc();
+			if (!pm_dir)
+				goto out;
+			pud_populate(&init_mm, pu_dir, pm_dir);
+		}
+		pm_dir = pmd_offset(pu_dir, address);
+#ifndef CONFIG_DEBUG_PAGEALLOC
+		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
+		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
+			pmd_val(*pm_dir) = __pa(address) |
+				_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
+				_SEGMENT_ENTRY_YOUNG |
+				(ro ? _SEGMENT_ENTRY_PROTECT : 0);
+			address += PMD_SIZE;
+			continue;
+		}
+#endif
+		if (pmd_none(*pm_dir)) {
+			pt_dir = vmem_pte_alloc(address);
+			if (!pt_dir)
+				goto out;
+			pmd_populate(&init_mm, pm_dir, pt_dir);
+		}
+
+		pt_dir = pte_offset_kernel(pm_dir, address);
+		pte_val(*pt_dir) = __pa(address) |
+			pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
+		address += PAGE_SIZE;
+	}
+	ret = 0;
+out:
+	return ret;
+}
+
+/*
+ * Remove a physical memory range from the 1:1 mapping.
+ * Currently only invalidates page table entries.
+ */
+static void vmem_remove_range(unsigned long start, unsigned long size)
+{
+	unsigned long end = start + size;
+	unsigned long address = start;
+	pgd_t *pg_dir;
+	pud_t *pu_dir;
+	pmd_t *pm_dir;
+	pte_t *pt_dir;
+	pte_t  pte;
+
+	pte_val(pte) = _PAGE_INVALID;
+	while (address < end) {
+		pg_dir = pgd_offset_k(address);
+		if (pgd_none(*pg_dir)) {
+			address += PGDIR_SIZE;
+			continue;
+		}
+		pu_dir = pud_offset(pg_dir, address);
+		if (pud_none(*pu_dir)) {
+			address += PUD_SIZE;
+			continue;
+		}
+		if (pud_large(*pu_dir)) {
+			pud_clear(pu_dir);
+			address += PUD_SIZE;
+			continue;
+		}
+		pm_dir = pmd_offset(pu_dir, address);
+		if (pmd_none(*pm_dir)) {
+			address += PMD_SIZE;
+			continue;
+		}
+		if (pmd_large(*pm_dir)) {
+			pmd_clear(pm_dir);
+			address += PMD_SIZE;
+			continue;
+		}
+		pt_dir = pte_offset_kernel(pm_dir, address);
+		*pt_dir = pte;
+		address += PAGE_SIZE;
+	}
+	flush_tlb_kernel_range(start, end);
+}
+
+/*
+ * Add a backed mem_map array to the virtual mem_map array.
+ */
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+{
+	unsigned long address = start;
+	pgd_t *pg_dir;
+	pud_t *pu_dir;
+	pmd_t *pm_dir;
+	pte_t *pt_dir;
+	int ret = -ENOMEM;
+
+	for (address = start; address < end;) {
+		pg_dir = pgd_offset_k(address);
+		if (pgd_none(*pg_dir)) {
+			pu_dir = vmem_pud_alloc();
+			if (!pu_dir)
+				goto out;
+			pgd_populate(&init_mm, pg_dir, pu_dir);
+		}
+
+		pu_dir = pud_offset(pg_dir, address);
+		if (pud_none(*pu_dir)) {
+			pm_dir = vmem_pmd_alloc();
+			if (!pm_dir)
+				goto out;
+			pud_populate(&init_mm, pu_dir, pm_dir);
+		}
+
+		pm_dir = pmd_offset(pu_dir, address);
+		if (pmd_none(*pm_dir)) {
+			/* Use 1MB frames for vmemmap if available. We always
+			 * use large frames even if they are only partially
+			 * used.
+			 * Otherwise we would have also page tables since
+			 * vmemmap_populate gets called for each section
+			 * separately. */
+			if (MACHINE_HAS_EDAT1) {
+				void *new_page;
+
+				new_page = vmemmap_alloc_block(PMD_SIZE, node);
+				if (!new_page)
+					goto out;
+				pmd_val(*pm_dir) = __pa(new_page) |
+					_SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE;
+				address = (address + PMD_SIZE) & PMD_MASK;
+				continue;
+			}
+			pt_dir = vmem_pte_alloc(address);
+			if (!pt_dir)
+				goto out;
+			pmd_populate(&init_mm, pm_dir, pt_dir);
+		} else if (pmd_large(*pm_dir)) {
+			address = (address + PMD_SIZE) & PMD_MASK;
+			continue;
+		}
+
+		pt_dir = pte_offset_kernel(pm_dir, address);
+		if (pte_none(*pt_dir)) {
+			void *new_page;
+
+			new_page = vmemmap_alloc_block(PAGE_SIZE, node);
+			if (!new_page)
+				goto out;
+			pte_val(*pt_dir) =
+				__pa(new_page) | pgprot_val(PAGE_KERNEL);
+		}
+		address += PAGE_SIZE;
+	}
+	ret = 0;
+out:
+	return ret;
+}
+
+void vmemmap_free(unsigned long start, unsigned long end)
+{
+}
+
+/*
+ * Add memory segment to the segment list if it doesn't overlap with
+ * an already present segment.
+ */
+static int insert_memory_segment(struct memory_segment *seg)
+{
+	struct memory_segment *tmp;
+
+	if (seg->start + seg->size > VMEM_MAX_PHYS ||
+	    seg->start + seg->size < seg->start)
+		return -ERANGE;
+
+	list_for_each_entry(tmp, &mem_segs, list) {
+		if (seg->start >= tmp->start + tmp->size)
+			continue;
+		if (seg->start + seg->size <= tmp->start)
+			continue;
+		return -ENOSPC;
+	}
+	list_add(&seg->list, &mem_segs);
+	return 0;
+}
+
+/*
+ * Remove memory segment from the segment list.
+ */
+static void remove_memory_segment(struct memory_segment *seg)
+{
+	list_del(&seg->list);
+}
+
+static void __remove_shared_memory(struct memory_segment *seg)
+{
+	remove_memory_segment(seg);
+	vmem_remove_range(seg->start, seg->size);
+}
+
+int vmem_remove_mapping(unsigned long start, unsigned long size)
+{
+	struct memory_segment *seg;
+	int ret;
+
+	mutex_lock(&vmem_mutex);
+
+	ret = -ENOENT;
+	list_for_each_entry(seg, &mem_segs, list) {
+		if (seg->start == start && seg->size == size)
+			break;
+	}
+
+	if (seg->start != start || seg->size != size)
+		goto out;
+
+	ret = 0;
+	__remove_shared_memory(seg);
+	kfree(seg);
+out:
+	mutex_unlock(&vmem_mutex);
+	return ret;
+}
+
+int vmem_add_mapping(unsigned long start, unsigned long size)
+{
+	struct memory_segment *seg;
+	int ret;
+
+	mutex_lock(&vmem_mutex);
+	ret = -ENOMEM;
+	seg = kzalloc(sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		goto out;
+	seg->start = start;
+	seg->size = size;
+
+	ret = insert_memory_segment(seg);
+	if (ret)
+		goto out_free;
+
+	ret = vmem_add_mem(start, size, 0);
+	if (ret)
+		goto out_remove;
+	goto out;
+
+out_remove:
+	__remove_shared_memory(seg);
+out_free:
+	kfree(seg);
+out:
+	mutex_unlock(&vmem_mutex);
+	return ret;
+}
+
+/*
+ * map whole physical memory to virtual memory (identity mapping)
+ * we reserve enough space in the vmalloc area for vmemmap to hotplug
+ * additional memory segments.
+ */
+void __init vmem_map_init(void)
+{
+	unsigned long ro_start, ro_end;
+	struct memblock_region *reg;
+	phys_addr_t start, end;
+
+	ro_start = PFN_ALIGN((unsigned long)&_stext);
+	ro_end = (unsigned long)&_eshared & PAGE_MASK;
+	for_each_memblock(memory, reg) {
+		start = reg->base;
+		end = reg->base + reg->size - 1;
+		if (start >= ro_end || end <= ro_start)
+			vmem_add_mem(start, end - start, 0);
+		else if (start >= ro_start && end <= ro_end)
+			vmem_add_mem(start, end - start, 1);
+		else if (start >= ro_start) {
+			vmem_add_mem(start, ro_end - start, 1);
+			vmem_add_mem(ro_end, end - ro_end, 0);
+		} else if (end < ro_end) {
+			vmem_add_mem(start, ro_start - start, 0);
+			vmem_add_mem(ro_start, end - ro_start, 1);
+		} else {
+			vmem_add_mem(start, ro_start - start, 0);
+			vmem_add_mem(ro_start, ro_end - ro_start, 1);
+			vmem_add_mem(ro_end, end - ro_end, 0);
+		}
+	}
+}
+
+/*
+ * Convert memblock.memory  to a memory segment list so there is a single
+ * list that contains all memory segments.
+ */
+static int __init vmem_convert_memory_chunk(void)
+{
+	struct memblock_region *reg;
+	struct memory_segment *seg;
+
+	mutex_lock(&vmem_mutex);
+	for_each_memblock(memory, reg) {
+		seg = kzalloc(sizeof(*seg), GFP_KERNEL);
+		if (!seg)
+			panic("Out of memory...\n");
+		seg->start = reg->base;
+		seg->size = reg->size;
+		insert_memory_segment(seg);
+	}
+	mutex_unlock(&vmem_mutex);
+	return 0;
+}
+
+core_initcall(vmem_convert_memory_chunk);
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
new file mode 100644
index 000000000..90568c33d
--- /dev/null
+++ b/arch/s390/net/Makefile
@@ -0,0 +1,4 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
new file mode 100644
index 000000000..a1c917d88
--- /dev/null
+++ b/arch/s390/net/bpf_jit.S
@@ -0,0 +1,115 @@
+/*
+ * BPF Jit compiler for s390, help functions.
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include "bpf_jit.h"
+
+/*
+ * Calling convention:
+ * registers %r7-%r10, %r11,%r13, and %r15 are call saved
+ *
+ * Input (64 bit):
+ *   %r3 (%b2) = offset into skb data
+ *   %r6 (%b5) = return address
+ *   %r7 (%b6) = skb pointer
+ *   %r12      = skb data pointer
+ *
+ * Output:
+ *   %r14= %b0 = return value (read skb value)
+ *
+ * Work registers: %r2,%r4,%r5,%r14
+ *
+ * skb_copy_bits takes 4 parameters:
+ *   %r2 = skb pointer
+ *   %r3 = offset into skb data
+ *   %r4 = pointer to temp buffer
+ *   %r5 = length to copy
+ *   Return value in %r2: 0 = ok
+ *
+ * bpf_internal_load_pointer_neg_helper takes 3 parameters:
+ *   %r2 = skb pointer
+ *   %r3 = offset into data
+ *   %r4 = length to copy
+ *   Return value in %r2: Pointer to data
+ */
+
+#define SKF_MAX_NEG_OFF	-0x200000	/* SKF_LL_OFF from filter.h */
+
+/*
+ * Load SIZE bytes from SKB
+ */
+#define sk_load_common(NAME, SIZE, LOAD)				\
+ENTRY(sk_load_##NAME);							\
+	ltgr	%r3,%r3;		/* Is offset negative? */	\
+	jl	sk_load_##NAME##_slow_neg;				\
+ENTRY(sk_load_##NAME##_pos);						\
+	aghi	%r3,SIZE;		/* Offset + SIZE */		\
+	clg	%r3,STK_OFF_HLEN(%r15);	/* Offset + SIZE > hlen? */	\
+	jh	sk_load_##NAME##_slow;					\
+	LOAD	%r14,-SIZE(%r3,%r12);	/* Get data from skb */		\
+	b	OFF_OK(%r6);		/* Return */			\
+									\
+sk_load_##NAME##_slow:;							\
+	lgr	%r2,%r7;		/* Arg1 = skb pointer */	\
+	aghi	%r3,-SIZE;		/* Arg2 = offset */		\
+	la	%r4,STK_OFF_TMP(%r15);	/* Arg3 = temp bufffer */	\
+	lghi	%r5,SIZE;		/* Arg4 = size */		\
+	brasl	%r14,skb_copy_bits;	/* Get data from skb */		\
+	LOAD	%r14,STK_OFF_TMP(%r15);	/* Load from temp bufffer */	\
+	ltgr	%r2,%r2;		/* Set cc to (%r2 != 0) */	\
+	br	%r6;			/* Return */
+
+sk_load_common(word, 4, llgf)	/* r14 = *(u32 *) (skb->data+offset) */
+sk_load_common(half, 2, llgh)	/* r14 = *(u16 *) (skb->data+offset) */
+
+/*
+ * Load 1 byte from SKB (optimized version)
+ */
+	/* r14 = *(u8 *) (skb->data+offset) */
+ENTRY(sk_load_byte)
+	ltgr	%r3,%r3			# Is offset negative?
+	jl	sk_load_byte_slow_neg
+ENTRY(sk_load_byte_pos)
+	clg	%r3,STK_OFF_HLEN(%r15)	# Offset >= hlen?
+	jnl	sk_load_byte_slow
+	llgc	%r14,0(%r3,%r12)	# Get byte from skb
+	b	OFF_OK(%r6)		# Return OK
+
+sk_load_byte_slow:
+	lgr	%r2,%r7			# Arg1 = skb pointer
+					# Arg2 = offset
+	la	%r4,STK_OFF_TMP(%r15)	# Arg3 = pointer to temp buffer
+	lghi	%r5,1			# Arg4 = size (1 byte)
+	brasl	%r14,skb_copy_bits	# Get data from skb
+	llgc	%r14,STK_OFF_TMP(%r15)	# Load result from temp buffer
+	ltgr	%r2,%r2			# Set cc to (%r2 != 0)
+	br	%r6			# Return cc
+
+#define sk_negative_common(NAME, SIZE, LOAD)				\
+sk_load_##NAME##_slow_neg:;						\
+	cgfi	%r3,SKF_MAX_NEG_OFF;					\
+	jl	bpf_error;						\
+	lgr	%r2,%r7;		/* Arg1 = skb pointer */	\
+					/* Arg2 = offset */		\
+	lghi	%r4,SIZE;		/* Arg3 = size */		\
+	brasl	%r14,bpf_internal_load_pointer_neg_helper;		\
+	ltgr	%r2,%r2;						\
+	jz	bpf_error;						\
+	LOAD	%r14,0(%r2);		/* Get data from pointer */	\
+	xr	%r3,%r3;		/* Set cc to zero */		\
+	br	%r6;			/* Return cc */
+
+sk_negative_common(word, 4, llgf)
+sk_negative_common(half, 2, llgh)
+sk_negative_common(byte, 1, llgc)
+
+bpf_error:
+# force a return 0 from jit handler
+	ltgr	%r15,%r15	# Set condition code
+	br	%r6
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
new file mode 100644
index 000000000..de156ba3b
--- /dev/null
+++ b/arch/s390/net/bpf_jit.h
@@ -0,0 +1,60 @@
+/*
+ * BPF Jit compiler defines
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#ifndef __ARCH_S390_NET_BPF_JIT_H
+#define __ARCH_S390_NET_BPF_JIT_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/filter.h>
+#include <linux/types.h>
+
+extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[];
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Stackframe layout (packed stack):
+ *
+ *				    ^ high
+ *	      +---------------+     |
+ *	      | old backchain |     |
+ *	      +---------------+     |
+ *	      |   r15 - r6    |     |
+ * BFP	   -> +===============+     |
+ *	      |		      |     |
+ *	      |   BPF stack   |     |
+ *	      |		      |     |
+ *	      +---------------+     |
+ *	      | 8 byte hlen   |     |
+ * R15+168 -> +---------------+     |
+ *	      | 4 byte align  |     |
+ *	      +---------------+     |
+ *	      | 4 byte temp   |     |
+ *	      | for bpf_jit.S |     |
+ * R15+160 -> +---------------+     |
+ *	      | new backchain |     |
+ * R15+152 -> +---------------+     |
+ *	      | + 152 byte SA |     |
+ * R15	   -> +---------------+     + low
+ *
+ * We get 160 bytes stack space from calling function, but only use
+ * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
+ */
+#define STK_SPACE	(MAX_BPF_STACK + 8 + 4 + 4 + 160)
+#define STK_160_UNUSED	(160 - 11 * 8)
+#define STK_OFF		(STK_SPACE - STK_160_UNUSED)
+#define STK_OFF_TMP	160	/* Offset of tmp buffer on stack */
+#define STK_OFF_HLEN	168	/* Offset of SKB header length on stack */
+
+/* Offset to skip condition code check */
+#define OFF_OK		4
+
+#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
new file mode 100644
index 000000000..9afb9d602
--- /dev/null
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -0,0 +1,1224 @@
+/*
+ * BPF Jit compiler for s390.
+ *
+ * Minimum build requirements:
+ *
+ *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
+ *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
+ *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
+ *  - PACK_STACK
+ *  - 64BIT
+ *
+ * Copyright IBM Corp. 2012,2015
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "bpf_jit"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/init.h>
+#include <asm/cacheflush.h>
+#include <asm/dis.h>
+#include "bpf_jit.h"
+
+int bpf_jit_enable __read_mostly;
+
+struct bpf_jit {
+	u32 seen;		/* Flags to remember seen eBPF instructions */
+	u32 seen_reg[16];	/* Array to remember which registers are used */
+	u32 *addrs;		/* Array with relative instruction addresses */
+	u8 *prg_buf;		/* Start of program */
+	int size;		/* Size of program and literal pool */
+	int size_prg;		/* Size of program */
+	int prg;		/* Current position in program */
+	int lit_start;		/* Start of literal pool */
+	int lit;		/* Current position in literal pool */
+	int base_ip;		/* Base address for literal pool */
+	int ret0_ip;		/* Address of return 0 */
+	int exit_ip;		/* Address of exit */
+};
+
+#define BPF_SIZE_MAX	4096	/* Max size for program */
+
+#define SEEN_SKB	1	/* skb access */
+#define SEEN_MEM	2	/* use mem[] for temporary storage */
+#define SEEN_RET0	4	/* ret0_ip points to a valid return 0 */
+#define SEEN_LITERAL	8	/* code uses literals */
+#define SEEN_FUNC	16	/* calls C functions */
+#define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)
+
+/*
+ * s390 registers
+ */
+#define REG_W0		(__MAX_BPF_REG+0)	/* Work register 1 (even) */
+#define REG_W1		(__MAX_BPF_REG+1)	/* Work register 2 (odd) */
+#define REG_SKB_DATA	(__MAX_BPF_REG+2)	/* SKB data register */
+#define REG_L		(__MAX_BPF_REG+3)	/* Literal pool register */
+#define REG_15		(__MAX_BPF_REG+4)	/* Register 15 */
+#define REG_0		REG_W0			/* Register 0 */
+#define REG_2		BPF_REG_1		/* Register 2 */
+#define REG_14		BPF_REG_0		/* Register 14 */
+
+/*
+ * Mapping of BPF registers to s390 registers
+ */
+static const int reg2hex[] = {
+	/* Return code */
+	[BPF_REG_0]	= 14,
+	/* Function parameters */
+	[BPF_REG_1]	= 2,
+	[BPF_REG_2]	= 3,
+	[BPF_REG_3]	= 4,
+	[BPF_REG_4]	= 5,
+	[BPF_REG_5]	= 6,
+	/* Call saved registers */
+	[BPF_REG_6]	= 7,
+	[BPF_REG_7]	= 8,
+	[BPF_REG_8]	= 9,
+	[BPF_REG_9]	= 10,
+	/* BPF stack pointer */
+	[BPF_REG_FP]	= 13,
+	/* SKB data pointer */
+	[REG_SKB_DATA]	= 12,
+	/* Work registers for s390x backend */
+	[REG_W0]	= 0,
+	[REG_W1]	= 1,
+	[REG_L]		= 11,
+	[REG_15]	= 15,
+};
+
+static inline u32 reg(u32 dst_reg, u32 src_reg)
+{
+	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
+}
+
+static inline u32 reg_high(u32 reg)
+{
+	return reg2hex[reg] << 4;
+}
+
+static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
+{
+	u32 r1 = reg2hex[b1];
+
+	if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15)
+		jit->seen_reg[r1] = 1;
+}
+
+#define REG_SET_SEEN(b1)					\
+({								\
+	reg_set_seen(jit, b1);					\
+})
+
+#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
+
+/*
+ * EMIT macros for code generation
+ */
+
+#define _EMIT2(op)						\
+({								\
+	if (jit->prg_buf)					\
+		*(u16 *) (jit->prg_buf + jit->prg) = op;	\
+	jit->prg += 2;						\
+})
+
+#define EMIT2(op, b1, b2)					\
+({								\
+	_EMIT2(op | reg(b1, b2));				\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define _EMIT4(op)						\
+({								\
+	if (jit->prg_buf)					\
+		*(u32 *) (jit->prg_buf + jit->prg) = op;	\
+	jit->prg += 4;						\
+})
+
+#define EMIT4(op, b1, b2)					\
+({								\
+	_EMIT4(op | reg(b1, b2));				\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define EMIT4_RRF(op, b1, b2, b3)				\
+({								\
+	_EMIT4(op | reg_high(b3) << 8 | reg(b1, b2));		\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+	REG_SET_SEEN(b3);					\
+})
+
+#define _EMIT4_DISP(op, disp)					\
+({								\
+	unsigned int __disp = (disp) & 0xfff;			\
+	_EMIT4(op | __disp);					\
+})
+
+#define EMIT4_DISP(op, b1, b2, disp)				\
+({								\
+	_EMIT4_DISP(op | reg_high(b1) << 16 |			\
+		    reg_high(b2) << 8, disp);			\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define EMIT4_IMM(op, b1, imm)					\
+({								\
+	unsigned int __imm = (imm) & 0xffff;			\
+	_EMIT4(op | reg_high(b1) << 16 | __imm);		\
+	REG_SET_SEEN(b1);					\
+})
+
+#define EMIT4_PCREL(op, pcrel)					\
+({								\
+	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
+	_EMIT4(op | __pcrel);					\
+})
+
+#define _EMIT6(op1, op2)					\
+({								\
+	if (jit->prg_buf) {					\
+		*(u32 *) (jit->prg_buf + jit->prg) = op1;	\
+		*(u16 *) (jit->prg_buf + jit->prg + 4) = op2;	\
+	}							\
+	jit->prg += 6;						\
+})
+
+#define _EMIT6_DISP(op1, op2, disp)				\
+({								\
+	unsigned int __disp = (disp) & 0xfff;			\
+	_EMIT6(op1 | __disp, op2);				\
+})
+
+#define EMIT6_DISP(op1, op2, b1, b2, b3, disp)			\
+({								\
+	_EMIT6_DISP(op1 | reg(b1, b2) << 16 |			\
+		    reg_high(b3) << 8, op2, disp);		\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+	REG_SET_SEEN(b3);					\
+})
+
+#define _EMIT6_DISP_LH(op1, op2, disp)				\
+({								\
+	unsigned int __disp_h = ((u32)disp) & 0xff000;		\
+	unsigned int __disp_l = ((u32)disp) & 0x00fff;		\
+	_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4);		\
+})
+
+#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
+({								\
+	_EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 |		\
+		       reg_high(b3) << 8, op2, disp);		\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+	REG_SET_SEEN(b3);					\
+})
+
+#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
+({								\
+	/* Branch instruction needs 6 bytes */			\
+	int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
+	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask);	\
+	REG_SET_SEEN(b1);					\
+	REG_SET_SEEN(b2);					\
+})
+
+#define _EMIT6_IMM(op, imm)					\
+({								\
+	unsigned int __imm = (imm);				\
+	_EMIT6(op | (__imm >> 16), __imm & 0xffff);		\
+})
+
+#define EMIT6_IMM(op, b1, imm)					\
+({								\
+	_EMIT6_IMM(op | reg_high(b1) << 16, imm);		\
+	REG_SET_SEEN(b1);					\
+})
+
+#define EMIT_CONST_U32(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit - jit->base_ip;				\
+	jit->seen |= SEEN_LITERAL;				\
+	if (jit->prg_buf)					\
+		*(u32 *) (jit->prg_buf + jit->lit) = (u32) val;	\
+	jit->lit += 4;						\
+	ret;							\
+})
+
+#define EMIT_CONST_U64(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit - jit->base_ip;				\
+	jit->seen |= SEEN_LITERAL;				\
+	if (jit->prg_buf)					\
+		*(u64 *) (jit->prg_buf + jit->lit) = (u64) val;	\
+	jit->lit += 8;						\
+	ret;							\
+})
+
+#define EMIT_ZERO(b1)						\
+({								\
+	/* llgfr %dst,%dst (zero extend to 64 bit) */		\
+	EMIT4(0xb9160000, b1, b1);				\
+	REG_SET_SEEN(b1);					\
+})
+
+/*
+ * Fill whole space with illegal instructions
+ */
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	memset(area, 0, size);
+}
+
+/*
+ * Save registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
+{
+	u32 off = 72 + (rs - 6) * 8;
+
+	if (rs == re)
+		/* stg %rs,off(%r15) */
+		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
+	else
+		/* stmg %rs,%re,off(%r15) */
+		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
+}
+
+/*
+ * Restore registers from "rs" (register start) to "re" (register end) on stack
+ */
+static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re)
+{
+	u32 off = 72 + (rs - 6) * 8;
+
+	if (jit->seen & SEEN_STACK)
+		off += STK_OFF;
+
+	if (rs == re)
+		/* lg %rs,off(%r15) */
+		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
+	else
+		/* lmg %rs,%re,off(%r15) */
+		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
+}
+
+/*
+ * Return first seen register (from start)
+ */
+static int get_start(struct bpf_jit *jit, int start)
+{
+	int i;
+
+	for (i = start; i <= 15; i++) {
+		if (jit->seen_reg[i])
+			return i;
+	}
+	return 0;
+}
+
+/*
+ * Return last seen register (from start) (gap >= 2)
+ */
+static int get_end(struct bpf_jit *jit, int start)
+{
+	int i;
+
+	for (i = start; i < 15; i++) {
+		if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
+			return i - 1;
+	}
+	return jit->seen_reg[15] ? 15 : 14;
+}
+
+#define REGS_SAVE	1
+#define REGS_RESTORE	0
+/*
+ * Save and restore clobbered registers (6-15) on stack.
+ * We save/restore registers in chunks with gap >= 2 registers.
+ */
+static void save_restore_regs(struct bpf_jit *jit, int op)
+{
+
+	int re = 6, rs;
+
+	do {
+		rs = get_start(jit, re);
+		if (!rs)
+			break;
+		re = get_end(jit, rs + 1);
+		if (op == REGS_SAVE)
+			save_regs(jit, rs, re);
+		else
+			restore_regs(jit, rs, re);
+		re++;
+	} while (re <= 15);
+}
+
+/*
+ * Emit function prologue
+ *
+ * Save registers and create stack frame if necessary.
+ * See stack frame layout desription in "bpf_jit.h"!
+ */
+static void bpf_jit_prologue(struct bpf_jit *jit)
+{
+	/* Save registers */
+	save_restore_regs(jit, REGS_SAVE);
+	/* Setup literal pool */
+	if (jit->seen & SEEN_LITERAL) {
+		/* basr %r13,0 */
+		EMIT2(0x0d00, REG_L, REG_0);
+		jit->base_ip = jit->prg;
+	}
+	/* Setup stack and backchain */
+	if (jit->seen & SEEN_STACK) {
+		if (jit->seen & SEEN_FUNC)
+			/* lgr %w1,%r15 (backchain) */
+			EMIT4(0xb9040000, REG_W1, REG_15);
+		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
+		/* aghi %r15,-STK_OFF */
+		EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
+		if (jit->seen & SEEN_FUNC)
+			/* stg %w1,152(%r15) (backchain) */
+			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+				      REG_15, 152);
+	}
+	/*
+	 * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
+	 * we store the SKB header length on the stack and the SKB data
+	 * pointer in REG_SKB_DATA.
+	 */
+	if (jit->seen & SEEN_SKB) {
+		/* Header length: llgf %w1,<len>(%b1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
+			      offsetof(struct sk_buff, len));
+		/* s %w1,<data_len>(%b1) */
+		EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
+			   offsetof(struct sk_buff, data_len));
+		/* stg %w1,ST_OFF_HLEN(%r0,%r15) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15,
+			      STK_OFF_HLEN);
+		/* lg %skb_data,data_off(%b1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
+			      BPF_REG_1, offsetof(struct sk_buff, data));
+	}
+	/* BPF compatibility: clear A (%b7) and X (%b8) registers */
+	if (REG_SEEN(BPF_REG_7))
+		/* lghi %b7,0 */
+		EMIT4_IMM(0xa7090000, BPF_REG_7, 0);
+	if (REG_SEEN(BPF_REG_8))
+		/* lghi %b8,0 */
+		EMIT4_IMM(0xa7090000, BPF_REG_8, 0);
+}
+
+/*
+ * Function epilogue
+ */
+static void bpf_jit_epilogue(struct bpf_jit *jit)
+{
+	/* Return 0 */
+	if (jit->seen & SEEN_RET0) {
+		jit->ret0_ip = jit->prg;
+		/* lghi %b0,0 */
+		EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
+	}
+	jit->exit_ip = jit->prg;
+	/* Load exit code: lgr %r2,%b0 */
+	EMIT4(0xb9040000, REG_2, BPF_REG_0);
+	/* Restore registers */
+	save_restore_regs(jit, REGS_RESTORE);
+	/* br %r14 */
+	_EMIT2(0x07fe);
+}
+
+/*
+ * Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
+ */
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+{
+	struct bpf_insn *insn = &fp->insnsi[i];
+	int jmp_off, last, insn_count = 1;
+	unsigned int func_addr, mask;
+	u32 dst_reg = insn->dst_reg;
+	u32 src_reg = insn->src_reg;
+	u32 *addrs = jit->addrs;
+	s32 imm = insn->imm;
+	s16 off = insn->off;
+
+	switch (insn->code) {
+	/*
+	 * BPF_MOV
+	 */
+	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
+		/* llgfr %dst,%src */
+		EMIT4(0xb9160000, dst_reg, src_reg);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+		/* lgr %dst,%src */
+		EMIT4(0xb9040000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
+		/* llilf %dst,imm */
+		EMIT6_IMM(0xc00f0000, dst_reg, imm);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
+		/* lgfi %dst,imm */
+		EMIT6_IMM(0xc0010000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_LD 64
+	 */
+	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+	{
+		/* 16 byte instruction that uses two 'struct bpf_insn' */
+		u64 imm64;
+
+		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
+		/* lg %dst,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
+			      EMIT_CONST_U64(imm64));
+		insn_count = 2;
+		break;
+	}
+	/*
+	 * BPF_ADD
+	 */
+	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
+		/* ar %dst,%src */
+		EMIT2(0x1a00, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
+		/* agr %dst,%src */
+		EMIT4(0xb9080000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
+		if (!imm)
+			break;
+		/* alfi %dst,imm */
+		EMIT6_IMM(0xc20b0000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
+		if (!imm)
+			break;
+		/* agfi %dst,imm */
+		EMIT6_IMM(0xc2080000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_SUB
+	 */
+	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
+		/* sr %dst,%src */
+		EMIT2(0x1b00, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
+		/* sgr %dst,%src */
+		EMIT4(0xb9090000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
+		if (!imm)
+			break;
+		/* alfi %dst,-imm */
+		EMIT6_IMM(0xc20b0000, dst_reg, -imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
+		if (!imm)
+			break;
+		/* agfi %dst,-imm */
+		EMIT6_IMM(0xc2080000, dst_reg, -imm);
+		break;
+	/*
+	 * BPF_MUL
+	 */
+	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
+		/* msr %dst,%src */
+		EMIT4(0xb2520000, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
+		/* msgr %dst,%src */
+		EMIT4(0xb90c0000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
+		if (imm == 1)
+			break;
+		/* msfi %r5,imm */
+		EMIT6_IMM(0xc2010000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
+		if (imm == 1)
+			break;
+		/* msgfi %dst,imm */
+		EMIT6_IMM(0xc2000000, dst_reg, imm);
+		break;
+	/*
+	 * BPF_DIV / BPF_MOD
+	 */
+	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
+	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		jit->seen |= SEEN_RET0;
+		/* ltr %src,%src (if src == 0 goto fail) */
+		EMIT2(0x1200, src_reg, src_reg);
+		/* jz <ret0> */
+		EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+		/* lhi %w0,0 */
+		EMIT4_IMM(0xa7080000, REG_W0, 0);
+		/* lr %w1,%dst */
+		EMIT2(0x1800, REG_W1, dst_reg);
+		/* dlr %w0,%src */
+		EMIT4(0xb9970000, REG_W0, src_reg);
+		/* llgfr %dst,%rc */
+		EMIT4(0xb9160000, dst_reg, rc_reg);
+		break;
+	}
+	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		jit->seen |= SEEN_RET0;
+		/* ltgr %src,%src (if src == 0 goto fail) */
+		EMIT4(0xb9020000, src_reg, src_reg);
+		/* jz <ret0> */
+		EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
+		/* lghi %w0,0 */
+		EMIT4_IMM(0xa7090000, REG_W0, 0);
+		/* lgr %w1,%dst */
+		EMIT4(0xb9040000, REG_W1, dst_reg);
+		/* dlgr %w0,%dst */
+		EMIT4(0xb9870000, REG_W0, src_reg);
+		/* lgr %dst,%rc */
+		EMIT4(0xb9040000, dst_reg, rc_reg);
+		break;
+	}
+	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
+	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		if (imm == 1) {
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* lhgi %dst,0 */
+				EMIT4_IMM(0xa7090000, dst_reg, 0);
+			break;
+		}
+		/* lhi %w0,0 */
+		EMIT4_IMM(0xa7080000, REG_W0, 0);
+		/* lr %w1,%dst */
+		EMIT2(0x1800, REG_W1, dst_reg);
+		/* dl %w0,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+			      EMIT_CONST_U32(imm));
+		/* llgfr %dst,%rc */
+		EMIT4(0xb9160000, dst_reg, rc_reg);
+		break;
+	}
+	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
+	{
+		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
+
+		if (imm == 1) {
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* lhgi %dst,0 */
+				EMIT4_IMM(0xa7090000, dst_reg, 0);
+			break;
+		}
+		/* lghi %w0,0 */
+		EMIT4_IMM(0xa7090000, REG_W0, 0);
+		/* lgr %w1,%dst */
+		EMIT4(0xb9040000, REG_W1, dst_reg);
+		/* dlg %w0,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+			      EMIT_CONST_U64(imm));
+		/* lgr %dst,%rc */
+		EMIT4(0xb9040000, dst_reg, rc_reg);
+		break;
+	}
+	/*
+	 * BPF_AND
+	 */
+	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
+		/* nr %dst,%src */
+		EMIT2(0x1400, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+		/* ngr %dst,%src */
+		EMIT4(0xb9800000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
+		/* nilf %dst,imm */
+		EMIT6_IMM(0xc00b0000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+		/* ng %dst,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
+			      EMIT_CONST_U64(imm));
+		break;
+	/*
+	 * BPF_OR
+	 */
+	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+		/* or %dst,%src */
+		EMIT2(0x1600, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+		/* ogr %dst,%src */
+		EMIT4(0xb9810000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
+		/* oilf %dst,imm */
+		EMIT6_IMM(0xc00d0000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
+		/* og %dst,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
+			      EMIT_CONST_U64(imm));
+		break;
+	/*
+	 * BPF_XOR
+	 */
+	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
+		/* xr %dst,%src */
+		EMIT2(0x1700, dst_reg, src_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
+		/* xgr %dst,%src */
+		EMIT4(0xb9820000, dst_reg, src_reg);
+		break;
+	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
+		if (!imm)
+			break;
+		/* xilf %dst,imm */
+		EMIT6_IMM(0xc0070000, dst_reg, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
+		/* xg %dst,<d(imm)>(%l) */
+		EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
+			      EMIT_CONST_U64(imm));
+		break;
+	/*
+	 * BPF_LSH
+	 */
+	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
+		/* sll %dst,0(%src) */
+		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
+		/* sllg %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
+		break;
+	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
+		if (imm == 0)
+			break;
+		/* sll %dst,imm(%r0) */
+		EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
+		if (imm == 0)
+			break;
+		/* sllg %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
+		break;
+	/*
+	 * BPF_RSH
+	 */
+	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
+		/* srl %dst,0(%src) */
+		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
+		/* srlg %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
+		if (imm == 0)
+			break;
+		/* srl %dst,imm(%r0) */
+		EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
+		if (imm == 0)
+			break;
+		/* srlg %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
+		break;
+	/*
+	 * BPF_ARSH
+	 */
+	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
+		/* srag %dst,%dst,0(%src) */
+		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
+		break;
+	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
+		if (imm == 0)
+			break;
+		/* srag %dst,%dst,imm(%r0) */
+		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
+		break;
+	/*
+	 * BPF_NEG
+	 */
+	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
+		/* lcr %dst,%dst */
+		EMIT2(0x1300, dst_reg, dst_reg);
+		EMIT_ZERO(dst_reg);
+		break;
+	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+		/* lcgr %dst,%dst */
+		EMIT4(0xb9130000, dst_reg, dst_reg);
+		break;
+	/*
+	 * BPF_FROM_BE/LE
+	 */
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+		/* s390 is big endian, therefore only clear high order bytes */
+		switch (imm) {
+		case 16: /* dst = (u16) cpu_to_be16(dst) */
+			/* llghr %dst,%dst */
+			EMIT4(0xb9850000, dst_reg, dst_reg);
+			break;
+		case 32: /* dst = (u32) cpu_to_be32(dst) */
+			/* llgfr %dst,%dst */
+			EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		case 64: /* dst = (u64) cpu_to_be64(dst) */
+			break;
+		}
+		break;
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+		switch (imm) {
+		case 16: /* dst = (u16) cpu_to_le16(dst) */
+			/* lrvr %dst,%dst */
+			EMIT4(0xb91f0000, dst_reg, dst_reg);
+			/* srl %dst,16(%r0) */
+			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
+			/* llghr %dst,%dst */
+			EMIT4(0xb9850000, dst_reg, dst_reg);
+			break;
+		case 32: /* dst = (u32) cpu_to_le32(dst) */
+			/* lrvr %dst,%dst */
+			EMIT4(0xb91f0000, dst_reg, dst_reg);
+			/* llgfr %dst,%dst */
+			EMIT4(0xb9160000, dst_reg, dst_reg);
+			break;
+		case 64: /* dst = (u64) cpu_to_le64(dst) */
+			/* lrvgr %dst,%dst */
+			EMIT4(0xb90f0000, dst_reg, dst_reg);
+			break;
+		}
+		break;
+	/*
+	 * BPF_ST(X)
+	 */
+	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
+		/* stcy %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+		/* sthy %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+		/* sty %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+		/* stg %src,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+		/* lhi %w0,imm */
+		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
+		/* stcy %w0,off(dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+		/* lhi %w0,imm */
+		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
+		/* sthy %w0,off(dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+		/* llilf %w0,imm  */
+		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
+		/* sty %w0,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+		/* lgfi %w0,imm */
+		EMIT6_IMM(0xc0010000, REG_W0, imm);
+		/* stg %w0,off(%dst) */
+		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	/*
+	 * BPF_STX XADD (atomic_add)
+	 */
+	case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */
+		/* laal %w0,%src,off(%dst) */
+		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg,
+			      dst_reg, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */
+		/* laalg %w0,%src,off(%dst) */
+		EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg,
+			      dst_reg, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	/*
+	 * BPF_LDX
+	 */
+	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+		/* llgc %dst,0(off,%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+		/* llgh %dst,0(off,%src) */
+		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
+		jit->seen |= SEEN_MEM;
+		break;
+	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+		/* llgf %dst,off(%src) */
+		jit->seen |= SEEN_MEM;
+		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+		break;
+	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+		/* lg %dst,0(off,%src) */
+		jit->seen |= SEEN_MEM;
+		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
+		break;
+	/*
+	 * BPF_JMP / CALL
+	 */
+	case BPF_JMP | BPF_CALL:
+	{
+		/*
+		 * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5)
+		 */
+		const u64 func = (u64)__bpf_call_base + imm;
+
+		REG_SET_SEEN(BPF_REG_5);
+		jit->seen |= SEEN_FUNC;
+		/* lg %w1,<d(imm)>(%l) */
+		EMIT6_DISP(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
+			   EMIT_CONST_U64(func));
+		/* basr %r14,%w1 */
+		EMIT2(0x0d00, REG_14, REG_W1);
+		/* lgr %b0,%r2: load return value into %b0 */
+		EMIT4(0xb9040000, BPF_REG_0, REG_2);
+		break;
+	}
+	case BPF_JMP | BPF_EXIT: /* return b0 */
+		last = (i == fp->len - 1) ? 1 : 0;
+		if (last && !(jit->seen & SEEN_RET0))
+			break;
+		/* j <exit> */
+		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+		break;
+	/*
+	 * Branch relative (number of skipped instructions) to offset on
+	 * condition.
+	 *
+	 * Condition code to mask mapping:
+	 *
+	 * CC | Description	   | Mask
+	 * ------------------------------
+	 * 0  | Operands equal	   |	8
+	 * 1  | First operand low  |	4
+	 * 2  | First operand high |	2
+	 * 3  | Unused		   |	1
+	 *
+	 * For s390x relative branches: ip = ip + off_bytes
+	 * For BPF relative branches:	insn = insn + off_insns + 1
+	 *
+	 * For example for s390x with offset 0 we jump to the branch
+	 * instruction itself (loop) and for BPF with offset 0 we
+	 * branch to the instruction behind the branch.
+	 */
+	case BPF_JMP | BPF_JA: /* if (true) */
+		mask = 0xf000; /* j */
+		goto branch_oc;
+	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
+		mask = 0x2000; /* jh */
+		goto branch_ks;
+	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
+		mask = 0xa000; /* jhe */
+		goto branch_ks;
+	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
+		mask = 0x2000; /* jh */
+		goto branch_ku;
+	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
+		mask = 0xa000; /* jhe */
+		goto branch_ku;
+	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
+		mask = 0x7000; /* jne */
+		goto branch_ku;
+	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
+		mask = 0x8000; /* je */
+		goto branch_ku;
+	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
+		mask = 0x7000; /* jnz */
+		/* lgfi %w1,imm (load sign extend imm) */
+		EMIT6_IMM(0xc0010000, REG_W1, imm);
+		/* ngr %w1,%dst */
+		EMIT4(0xb9800000, REG_W1, dst_reg);
+		goto branch_oc;
+
+	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
+		mask = 0x2000; /* jh */
+		goto branch_xs;
+	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
+		mask = 0xa000; /* jhe */
+		goto branch_xs;
+	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
+		mask = 0x2000; /* jh */
+		goto branch_xu;
+	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
+		mask = 0xa000; /* jhe */
+		goto branch_xu;
+	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
+		mask = 0x7000; /* jne */
+		goto branch_xu;
+	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
+		mask = 0x8000; /* je */
+		goto branch_xu;
+	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
+		mask = 0x7000; /* jnz */
+		/* ngrk %w1,%dst,%src */
+		EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg);
+		goto branch_oc;
+branch_ks:
+		/* lgfi %w1,imm (load sign extend imm) */
+		EMIT6_IMM(0xc0010000, REG_W1, imm);
+		/* cgrj %dst,%w1,mask,off */
+		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask);
+		break;
+branch_ku:
+		/* lgfi %w1,imm (load sign extend imm) */
+		EMIT6_IMM(0xc0010000, REG_W1, imm);
+		/* clgrj %dst,%w1,mask,off */
+		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask);
+		break;
+branch_xs:
+		/* cgrj %dst,%src,mask,off */
+		EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask);
+		break;
+branch_xu:
+		/* clgrj %dst,%src,mask,off */
+		EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask);
+		break;
+branch_oc:
+		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
+		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
+		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+		break;
+	/*
+	 * BPF_LD
+	 */
+	case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
+	case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
+		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+			func_addr = __pa(sk_load_byte_pos);
+		else
+			func_addr = __pa(sk_load_byte);
+		goto call_fn;
+	case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
+	case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
+		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+			func_addr = __pa(sk_load_half_pos);
+		else
+			func_addr = __pa(sk_load_half);
+		goto call_fn;
+	case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
+	case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
+		if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
+			func_addr = __pa(sk_load_word_pos);
+		else
+			func_addr = __pa(sk_load_word);
+		goto call_fn;
+call_fn:
+		jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
+		REG_SET_SEEN(REG_14); /* Return address of possible func call */
+
+		/*
+		 * Implicit input:
+		 *  BPF_REG_6	 (R7) : skb pointer
+		 *  REG_SKB_DATA (R12): skb data pointer
+		 *
+		 * Calculated input:
+		 *  BPF_REG_2	 (R3) : offset of byte(s) to fetch in skb
+		 *  BPF_REG_5	 (R6) : return address
+		 *
+		 * Output:
+		 *  BPF_REG_0	 (R14): data read from skb
+		 *
+		 * Scratch registers (BPF_REG_1-5)
+		 */
+
+		/* Call function: llilf %w1,func_addr  */
+		EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
+
+		/* Offset: lgfi %b2,imm */
+		EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
+		if (BPF_MODE(insn->code) == BPF_IND)
+			/* agfr %b2,%src (%src is s32 here) */
+			EMIT4(0xb9180000, BPF_REG_2, src_reg);
+
+		/* basr %b5,%w1 (%b5 is call saved) */
+		EMIT2(0x0d00, BPF_REG_5, REG_W1);
+
+		/*
+		 * Note: For fast access we jump directly after the
+		 * jnz instruction from bpf_jit.S
+		 */
+		/* jnz <ret0> */
+		EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
+		break;
+	default: /* too complex, give up */
+		pr_err("Unknown opcode %02x\n", insn->code);
+		return -1;
+	}
+	return insn_count;
+}
+
+/*
+ * Compile eBPF program into s390x code
+ */
+static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp)
+{
+	int i, insn_count;
+
+	jit->lit = jit->lit_start;
+	jit->prg = 0;
+
+	bpf_jit_prologue(jit);
+	for (i = 0; i < fp->len; i += insn_count) {
+		insn_count = bpf_jit_insn(jit, fp, i);
+		if (insn_count < 0)
+			return -1;
+		jit->addrs[i + 1] = jit->prg; /* Next instruction address */
+	}
+	bpf_jit_epilogue(jit);
+
+	jit->lit_start = jit->prg;
+	jit->size = jit->lit;
+	jit->size_prg = jit->prg;
+	return 0;
+}
+
+/*
+ * Classic BPF function stub. BPF programs will be converted into
+ * eBPF and then bpf_int_jit_compile() will be called.
+ */
+void bpf_jit_compile(struct bpf_prog *fp)
+{
+}
+
+/*
+ * Compile eBPF program "fp"
+ */
+void bpf_int_jit_compile(struct bpf_prog *fp)
+{
+	struct bpf_binary_header *header;
+	struct bpf_jit jit;
+	int pass;
+
+	if (!bpf_jit_enable)
+		return;
+	memset(&jit, 0, sizeof(jit));
+	jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+	if (jit.addrs == NULL)
+		return;
+	/*
+	 * Three initial passes:
+	 *   - 1/2: Determine clobbered registers
+	 *   - 3:   Calculate program size and addrs arrray
+	 */
+	for (pass = 1; pass <= 3; pass++) {
+		if (bpf_jit_prog(&jit, fp))
+			goto free_addrs;
+	}
+	/*
+	 * Final pass: Allocate and generate program
+	 */
+	if (jit.size >= BPF_SIZE_MAX)
+		goto free_addrs;
+	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+	if (!header)
+		goto free_addrs;
+	if (bpf_jit_prog(&jit, fp))
+		goto free_addrs;
+	if (bpf_jit_enable > 1) {
+		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
+		if (jit.prg_buf)
+			print_fn_code(jit.prg_buf, jit.size_prg);
+	}
+	if (jit.prg_buf) {
+		set_memory_ro((unsigned long)header, header->pages);
+		fp->bpf_func = (void *) jit.prg_buf;
+		fp->jited = true;
+	}
+free_addrs:
+	kfree(jit.addrs);
+}
+
+/*
+ * Free eBPF program
+ */
+void bpf_jit_free(struct bpf_prog *fp)
+{
+	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+	struct bpf_binary_header *header = (void *)addr;
+
+	if (!fp->jited)
+		goto free_filter;
+
+	set_memory_rw(addr, header->pages);
+	bpf_jit_binary_free(header);
+
+free_filter:
+	bpf_prog_unlock_free(fp);
+}
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
new file mode 100644
index 000000000..1bd230171
--- /dev/null
+++ b/arch/s390/oprofile/Makefile
@@ -0,0 +1,10 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o  \
+		timer_int.o )
+
+oprofile-y :=	$(DRIVER_OBJS) init.o backtrace.o
+oprofile-y +=	hwsampler.o
diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c
new file mode 100644
index 000000000..8a6811b2c
--- /dev/null
+++ b/arch/s390/oprofile/backtrace.c
@@ -0,0 +1,77 @@
+/*
+ * S390 Version
+ *   Copyright IBM Corp. 2005
+ *   Author(s): Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
+ */
+
+#include <linux/oprofile.h>
+
+#include <asm/processor.h> /* for struct stack_frame */
+
+static unsigned long
+__show_trace(unsigned int *depth, unsigned long sp,
+	     unsigned long low, unsigned long high)
+{
+	struct stack_frame *sf;
+	struct pt_regs *regs;
+
+	while (*depth) {
+		sp = sp & PSW_ADDR_INSN;
+		if (sp < low || sp > high - sizeof(*sf))
+			return sp;
+		sf = (struct stack_frame *) sp;
+		(*depth)--;
+		oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+
+		/* Follow the backchain.  */
+		while (*depth) {
+			low = sp;
+			sp = sf->back_chain & PSW_ADDR_INSN;
+			if (!sp)
+				break;
+			if (sp <= low || sp > high - sizeof(*sf))
+				return sp;
+			sf = (struct stack_frame *) sp;
+			(*depth)--;
+			oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+
+		}
+
+		if (*depth == 0)
+			break;
+
+		/* Zero backchain detected, check for interrupt frame.  */
+		sp = (unsigned long) (sf + 1);
+		if (sp <= low || sp > high - sizeof(*regs))
+			return sp;
+		regs = (struct pt_regs *) sp;
+		(*depth)--;
+		oprofile_add_trace(sf->gprs[8] & PSW_ADDR_INSN);
+		low = sp;
+		sp = regs->gprs[15];
+	}
+	return sp;
+}
+
+void s390_backtrace(struct pt_regs * const regs, unsigned int depth)
+{
+	unsigned long head;
+	struct stack_frame* head_sf;
+
+	if (user_mode(regs))
+		return;
+
+	head = regs->gprs[15];
+	head_sf = (struct stack_frame*)head;
+
+	if (!head_sf->back_chain)
+		return;
+
+	head = head_sf->back_chain;
+
+	head = __show_trace(&depth, head, S390_lowcore.async_stack - ASYNC_SIZE,
+			    S390_lowcore.async_stack);
+
+	__show_trace(&depth, head, S390_lowcore.thread_info,
+		     S390_lowcore.thread_info + THREAD_SIZE);
+}
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
new file mode 100644
index 000000000..ff9b4eb34
--- /dev/null
+++ b/arch/s390/oprofile/hwsampler.c
@@ -0,0 +1,1178 @@
+/*
+ * Copyright IBM Corp. 2010
+ * Author: Heinz Graalfs <graalfs@de.ibm.com>
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/semaphore.h>
+#include <linux/oom.h>
+#include <linux/oprofile.h>
+
+#include <asm/facility.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+#include "hwsampler.h"
+#include "op_counter.h"
+
+#define MAX_NUM_SDB 511
+#define MIN_NUM_SDB 1
+
+DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
+
+struct hws_execute_parms {
+	void *buffer;
+	signed int rc;
+};
+
+DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
+EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer);
+
+static DEFINE_MUTEX(hws_sem);
+static DEFINE_MUTEX(hws_sem_oom);
+
+static unsigned char hws_flush_all;
+static unsigned int hws_oom;
+static unsigned int hws_alert;
+static struct workqueue_struct *hws_wq;
+
+static unsigned int hws_state;
+enum {
+	HWS_INIT = 1,
+	HWS_DEALLOCATED,
+	HWS_STOPPED,
+	HWS_STARTED,
+	HWS_STOPPING };
+
+/* set to 1 if called by kernel during memory allocation */
+static unsigned char oom_killer_was_active;
+/* size of SDBT and SDB as of allocate API */
+static unsigned long num_sdbt = 100;
+static unsigned long num_sdb = 511;
+/* sampling interval (machine cycles) */
+static unsigned long interval;
+
+static unsigned long min_sampler_rate;
+static unsigned long max_sampler_rate;
+
+static void execute_qsi(void *parms)
+{
+	struct hws_execute_parms *ep = parms;
+
+	ep->rc = qsi(ep->buffer);
+}
+
+static void execute_ssctl(void *parms)
+{
+	struct hws_execute_parms *ep = parms;
+
+	ep->rc = lsctl(ep->buffer);
+}
+
+static int smp_ctl_ssctl_stop(int cpu)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.es = 0;
+	cb->ssctl.cs = 0;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc) {
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+		dump_stack();
+	}
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	if (cb->qsi.es || cb->qsi.cs) {
+		printk(KERN_EMERG "CPUMF sampling did not stop properly.\n");
+		dump_stack();
+	}
+
+	return rc;
+}
+
+static int smp_ctl_ssctl_deactivate(int cpu)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.es = 1;
+	cb->ssctl.cs = 0;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	if (cb->qsi.cs)
+		printk(KERN_EMERG "CPUMF sampling was not set inactive.\n");
+
+	return rc;
+}
+
+static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.h = 1;
+	cb->ssctl.tear = cb->first_sdbt;
+	cb->ssctl.dear = *(unsigned long *) cb->first_sdbt;
+	cb->ssctl.interval = interval;
+	cb->ssctl.es = 1;
+	cb->ssctl.cs = 1;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+	if (ep.rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu);
+
+	return rc;
+}
+
+static int smp_ctl_qsi(int cpu)
+{
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	return ep.rc;
+}
+
+static void hws_ext_handler(struct ext_code ext_code,
+			    unsigned int param32, unsigned long param64)
+{
+	struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer);
+
+	if (!(param32 & CPU_MF_INT_SF_MASK))
+		return;
+
+	if (!hws_alert)
+		return;
+
+	inc_irq_stat(IRQEXT_CMS);
+	atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32);
+
+	if (hws_wq)
+		queue_work(hws_wq, &cb->worker);
+}
+
+static void worker(struct work_struct *work);
+
+static void add_samples_to_oprofile(unsigned cpu, unsigned long *,
+				unsigned long *dear);
+
+static void init_all_cpu_buffers(void)
+{
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		memset(cb, 0, sizeof(struct hws_cpu_buffer));
+	}
+}
+
+static void prepare_cpu_buffers(void)
+{
+	struct hws_cpu_buffer *cb;
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		atomic_set(&cb->ext_params, 0);
+		cb->worker_entry = 0;
+		cb->sample_overflow = 0;
+		cb->req_alert = 0;
+		cb->incorrect_sdbt_entry = 0;
+		cb->invalid_entry_address = 0;
+		cb->loss_of_sample_data = 0;
+		cb->sample_auth_change_alert = 0;
+		cb->finish = 0;
+		cb->oom = 0;
+		cb->stop_mode = 0;
+	}
+}
+
+/*
+ * allocate_sdbt() - allocate sampler memory
+ * @cpu: the cpu for which sampler memory is allocated
+ *
+ * A 4K page is allocated for each requested SDBT.
+ * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs.
+ * Set ALERT_REQ mask in each SDBs trailer.
+ * Returns zero if successful, <0 otherwise.
+ */
+static int allocate_sdbt(int cpu)
+{
+	int j, k, rc;
+	unsigned long *sdbt;
+	unsigned long  sdb;
+	unsigned long *tail;
+	unsigned long *trailer;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	if (cb->first_sdbt)
+		return -EINVAL;
+
+	sdbt = NULL;
+	tail = sdbt;
+
+	for (j = 0; j < num_sdbt; j++) {
+		sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+
+		mutex_lock(&hws_sem_oom);
+		/* OOM killer might have been activated */
+		barrier();
+		if (oom_killer_was_active || !sdbt) {
+			if (sdbt)
+				free_page((unsigned long)sdbt);
+
+			goto allocate_sdbt_error;
+		}
+		if (cb->first_sdbt == 0)
+			cb->first_sdbt = (unsigned long)sdbt;
+
+		/* link current page to tail of chain */
+		if (tail)
+			*tail = (unsigned long)(void *)sdbt + 1;
+
+		mutex_unlock(&hws_sem_oom);
+
+		for (k = 0; k < num_sdb; k++) {
+			/* get and set SDB page */
+			sdb = get_zeroed_page(GFP_KERNEL);
+
+			mutex_lock(&hws_sem_oom);
+			/* OOM killer might have been activated */
+			barrier();
+			if (oom_killer_was_active || !sdb) {
+				if (sdb)
+					free_page(sdb);
+
+				goto allocate_sdbt_error;
+			}
+			*sdbt = sdb;
+			trailer = trailer_entry_ptr(*sdbt);
+			*trailer = SDB_TE_ALERT_REQ_MASK;
+			sdbt++;
+			mutex_unlock(&hws_sem_oom);
+		}
+		tail = sdbt;
+	}
+	mutex_lock(&hws_sem_oom);
+	if (oom_killer_was_active)
+		goto allocate_sdbt_error;
+
+	rc = 0;
+	if (tail)
+		*tail = (unsigned long)
+			((void *)cb->first_sdbt) + 1;
+
+allocate_sdbt_exit:
+	mutex_unlock(&hws_sem_oom);
+	return rc;
+
+allocate_sdbt_error:
+	rc = -ENOMEM;
+	goto allocate_sdbt_exit;
+}
+
+/*
+ * deallocate_sdbt() - deallocate all sampler memory
+ *
+ * For each online CPU all SDBT trees are deallocated.
+ * Returns the number of freed pages.
+ */
+static int deallocate_sdbt(void)
+{
+	int cpu;
+	int counter;
+
+	counter = 0;
+
+	for_each_online_cpu(cpu) {
+		unsigned long start;
+		unsigned long sdbt;
+		unsigned long *curr;
+		struct hws_cpu_buffer *cb;
+
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+		if (!cb->first_sdbt)
+			continue;
+
+		sdbt = cb->first_sdbt;
+		curr = (unsigned long *) sdbt;
+		start = sdbt;
+
+		/* we'll free the SDBT after all SDBs are processed... */
+		while (1) {
+			if (!*curr || !sdbt)
+				break;
+
+			/* watch for link entry reset if found */
+			if (is_link_entry(curr)) {
+				curr = get_next_sdbt(curr);
+				if (sdbt)
+					free_page(sdbt);
+
+				/* we are done if we reach the start */
+				if ((unsigned long) curr == start)
+					break;
+				else
+					sdbt = (unsigned long) curr;
+			} else {
+				/* process SDB pointer */
+				if (*curr) {
+					free_page(*curr);
+					curr++;
+				}
+			}
+			counter++;
+		}
+		cb->first_sdbt = 0;
+	}
+	return counter;
+}
+
+static int start_sampling(int cpu)
+{
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+	if (rc) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu);
+		goto start_exit;
+	}
+
+	rc = -EINVAL;
+	if (!cb->qsi.es) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu);
+		goto start_exit;
+	}
+
+	if (!cb->qsi.cs) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu);
+		goto start_exit;
+	}
+
+	printk(KERN_INFO
+		"hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n",
+		cpu, interval);
+
+	rc = 0;
+
+start_exit:
+	return rc;
+}
+
+static int stop_sampling(int cpu)
+{
+	unsigned long v;
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = smp_ctl_qsi(cpu);
+	WARN_ON(rc);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (!rc && !cb->qsi.es)
+		printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu);
+
+	rc = smp_ctl_ssctl_stop(cpu);
+	if (rc) {
+		printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n",
+				cpu, rc);
+		goto stop_exit;
+	}
+
+	printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu);
+
+stop_exit:
+	v = cb->req_alert;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->loss_of_sample_data;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->invalid_entry_address;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->incorrect_sdbt_entry;
+	if (v)
+		printk(KERN_ERR
+				"hwsampler: CPU %d CPUMF Incorrect SDBT address,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->sample_auth_change_alert;
+	if (v)
+		printk(KERN_ERR
+				"hwsampler: CPU %d CPUMF Sample authorization change,"
+				" count=%lu.\n", cpu, v);
+
+	return rc;
+}
+
+static int check_hardware_prerequisites(void)
+{
+	if (!test_facility(68))
+		return -EOPNOTSUPP;
+	return 0;
+}
+/*
+ * hws_oom_callback() - the OOM callback function
+ *
+ * In case the callback is invoked during memory allocation for the
+ *  hw sampler, all obtained memory is deallocated and a flag is set
+ *  so main sampler memory allocation can exit with a failure code.
+ * In case the callback is invoked during sampling the hw sampler
+ *  is deactivated for all CPUs.
+ */
+static int hws_oom_callback(struct notifier_block *nfb,
+	unsigned long dummy, void *parm)
+{
+	unsigned long *freed;
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	freed = parm;
+
+	mutex_lock(&hws_sem_oom);
+
+	if (hws_state == HWS_DEALLOCATED) {
+		/* during memory allocation */
+		if (oom_killer_was_active == 0) {
+			oom_killer_was_active = 1;
+			*freed += deallocate_sdbt();
+		}
+	} else {
+		int i;
+		cpu = get_cpu();
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+		if (!cb->oom) {
+			for_each_online_cpu(i) {
+				smp_ctl_ssctl_deactivate(i);
+				cb->oom = 1;
+			}
+			cb->finish = 1;
+
+			printk(KERN_INFO
+				"hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n",
+				cpu);
+		}
+	}
+
+	mutex_unlock(&hws_sem_oom);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hws_oom_notifier = {
+	.notifier_call = hws_oom_callback
+};
+
+static int hws_cpu_callback(struct notifier_block *nfb,
+	unsigned long action, void *hcpu)
+{
+	/* We do not have sampler space available for all possible CPUs.
+	   All CPUs should be online when hw sampling is activated. */
+	return (hws_state <= HWS_DEALLOCATED) ? NOTIFY_OK : NOTIFY_BAD;
+}
+
+static struct notifier_block hws_cpu_notifier = {
+	.notifier_call = hws_cpu_callback
+};
+
+/**
+ * hwsampler_deactivate() - set hardware sampling temporarily inactive
+ * @cpu:  specifies the CPU to be set inactive.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deactivate(unsigned int cpu)
+{
+	/*
+	 * Deactivate hw sampling temporarily and flush the buffer
+	 * by pushing all the pending samples to oprofile buffer.
+	 *
+	 * This function can be called under one of the following conditions:
+	 *     Memory unmap, task is exiting.
+	 */
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	mutex_lock(&hws_sem);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (hws_state == HWS_STARTED) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (cb->qsi.cs) {
+			rc = smp_ctl_ssctl_deactivate(cpu);
+			if (rc) {
+				printk(KERN_INFO
+				"hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu);
+				cb->finish = 1;
+				hws_state = HWS_STOPPING;
+			} else  {
+				hws_flush_all = 1;
+				/* Add work to queue to read pending samples.*/
+				queue_work_on(cpu, hws_wq, &cb->worker);
+			}
+		}
+	}
+	mutex_unlock(&hws_sem);
+
+	if (hws_wq)
+		flush_workqueue(hws_wq);
+
+	return rc;
+}
+
+/**
+ * hwsampler_activate() - activate/resume hardware sampling which was deactivated
+ * @cpu:  specifies the CPU to be set active.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_activate(unsigned int cpu)
+{
+	/*
+	 * Re-activate hw sampling. This should be called in pair with
+	 * hwsampler_deactivate().
+	 */
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	mutex_lock(&hws_sem);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (hws_state == HWS_STARTED) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (!cb->qsi.cs) {
+			hws_flush_all = 0;
+			rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+			if (rc) {
+				printk(KERN_ERR
+				"CPU %d, CPUMF activate sampling failed.\n",
+					 cpu);
+			}
+		}
+	}
+
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
+
+static int check_qsi_on_setup(void)
+{
+	int rc;
+	unsigned int cpu;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (rc)
+			return -EOPNOTSUPP;
+
+		if (!cb->qsi.as) {
+			printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n");
+			return -EINVAL;
+		}
+
+		if (cb->qsi.es) {
+			printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n");
+			rc = smp_ctl_ssctl_stop(cpu);
+			if (rc)
+				return -EINVAL;
+
+			printk(KERN_INFO
+				"CPU %d, CPUMF Sampling stopped now.\n", cpu);
+		}
+	}
+	return 0;
+}
+
+static int check_qsi_on_start(void)
+{
+	unsigned int cpu;
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+
+		if (!cb->qsi.as)
+			return -EINVAL;
+
+		if (cb->qsi.es)
+			return -EINVAL;
+
+		if (cb->qsi.cs)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static void worker_on_start(unsigned int cpu)
+{
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	cb->worker_entry = cb->first_sdbt;
+}
+
+static int worker_check_error(unsigned int cpu, int ext_params)
+{
+	int rc;
+	unsigned long *sdbt;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	sdbt = (unsigned long *) cb->worker_entry;
+
+	if (!sdbt || !*sdbt)
+		return -EINVAL;
+
+	if (ext_params & CPU_MF_INT_SF_PRA)
+		cb->req_alert++;
+
+	if (ext_params & CPU_MF_INT_SF_LSDA)
+		cb->loss_of_sample_data++;
+
+	if (ext_params & CPU_MF_INT_SF_IAE) {
+		cb->invalid_entry_address++;
+		rc = -EINVAL;
+	}
+
+	if (ext_params & CPU_MF_INT_SF_ISE) {
+		cb->incorrect_sdbt_entry++;
+		rc = -EINVAL;
+	}
+
+	if (ext_params & CPU_MF_INT_SF_SACA) {
+		cb->sample_auth_change_alert++;
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+static void worker_on_finish(unsigned int cpu)
+{
+	int rc, i;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	if (cb->finish) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (cb->qsi.es) {
+			printk(KERN_INFO
+				"hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n",
+				cpu);
+			rc = smp_ctl_ssctl_stop(cpu);
+			if (rc)
+				printk(KERN_INFO
+					"hwsampler: CPU %d, CPUMF Deactivation failed.\n",
+					cpu);
+
+			for_each_online_cpu(i) {
+				if (i == cpu)
+					continue;
+				if (!cb->finish) {
+					cb->finish = 1;
+					queue_work_on(i, hws_wq,
+						&cb->worker);
+				}
+			}
+		}
+	}
+}
+
+static void worker_on_interrupt(unsigned int cpu)
+{
+	unsigned long *sdbt;
+	unsigned char done;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	sdbt = (unsigned long *) cb->worker_entry;
+
+	done = 0;
+	/* do not proceed if stop was entered,
+	 * forget the buffers not yet processed */
+	while (!done && !cb->stop_mode) {
+		unsigned long *trailer;
+		struct hws_trailer_entry *te;
+		unsigned long *dear = 0;
+
+		trailer = trailer_entry_ptr(*sdbt);
+		/* leave loop if no more work to do */
+		if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) {
+			done = 1;
+			if (!hws_flush_all)
+				continue;
+		}
+
+		te = (struct hws_trailer_entry *)trailer;
+		cb->sample_overflow += te->overflow;
+
+		add_samples_to_oprofile(cpu, sdbt, dear);
+
+		/* reset trailer */
+		xchg((unsigned char *) te, 0x40);
+
+		/* advance to next sdb slot in current sdbt */
+		sdbt++;
+		/* in case link bit is set use address w/o link bit */
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		cb->worker_entry = (unsigned long)sdbt;
+	}
+}
+
+static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
+		unsigned long *dear)
+{
+	struct hws_basic_entry *sample_data_ptr;
+	unsigned long *trailer;
+
+	trailer = trailer_entry_ptr(*sdbt);
+	if (dear) {
+		if (dear > trailer)
+			return;
+		trailer = dear;
+	}
+
+	sample_data_ptr = (struct hws_basic_entry *)(*sdbt);
+
+	while ((unsigned long *)sample_data_ptr < trailer) {
+		struct pt_regs *regs = NULL;
+		struct task_struct *tsk = NULL;
+
+		/*
+		 * Check sampling mode, 1 indicates basic (=customer) sampling
+		 * mode.
+		 */
+		if (sample_data_ptr->def != 1) {
+			/* sample slot is not yet written */
+			break;
+		} else {
+			/* make sure we don't use it twice,
+			 * the next time the sampler will set it again */
+			sample_data_ptr->def = 0;
+		}
+
+		/* Get pt_regs. */
+		if (sample_data_ptr->P == 1) {
+			/* userspace sample */
+			unsigned int pid = sample_data_ptr->prim_asn;
+			if (!counter_config.user)
+				goto skip_sample;
+			rcu_read_lock();
+			tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
+			if (tsk)
+				regs = task_pt_regs(tsk);
+			rcu_read_unlock();
+		} else {
+			/* kernelspace sample */
+			if (!counter_config.kernel)
+				goto skip_sample;
+			regs = task_pt_regs(current);
+		}
+
+		mutex_lock(&hws_sem);
+		oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
+				!sample_data_ptr->P, tsk);
+		mutex_unlock(&hws_sem);
+	skip_sample:
+		sample_data_ptr++;
+	}
+}
+
+static void worker(struct work_struct *work)
+{
+	unsigned int cpu;
+	int ext_params;
+	struct hws_cpu_buffer *cb;
+
+	cb = container_of(work, struct hws_cpu_buffer, worker);
+	cpu = smp_processor_id();
+	ext_params = atomic_xchg(&cb->ext_params, 0);
+
+	if (!cb->worker_entry)
+		worker_on_start(cpu);
+
+	if (worker_check_error(cpu, ext_params))
+		return;
+
+	if (!cb->finish)
+		worker_on_interrupt(cpu);
+
+	if (cb->finish)
+		worker_on_finish(cpu);
+}
+
+/**
+ * hwsampler_allocate() - allocate memory for the hardware sampler
+ * @sdbt:  number of SDBTs per online CPU (must be > 0)
+ * @sdb:   number of SDBs per SDBT (minimum 1, maximum 511)
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb)
+{
+	int cpu, rc;
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state != HWS_DEALLOCATED)
+		goto allocate_exit;
+
+	if (sdbt < 1)
+		goto allocate_exit;
+
+	if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB)
+		goto allocate_exit;
+
+	num_sdbt = sdbt;
+	num_sdb = sdb;
+
+	oom_killer_was_active = 0;
+	register_oom_notifier(&hws_oom_notifier);
+
+	for_each_online_cpu(cpu) {
+		if (allocate_sdbt(cpu)) {
+			unregister_oom_notifier(&hws_oom_notifier);
+			goto allocate_error;
+		}
+	}
+	unregister_oom_notifier(&hws_oom_notifier);
+	if (oom_killer_was_active)
+		goto allocate_error;
+
+	hws_state = HWS_STOPPED;
+	rc = 0;
+
+allocate_exit:
+	mutex_unlock(&hws_sem);
+	return rc;
+
+allocate_error:
+	rc = -ENOMEM;
+	printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n");
+	goto allocate_exit;
+}
+
+/**
+ * hwsampler_deallocate() - deallocate hardware sampler memory
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deallocate(void)
+{
+	int rc;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state != HWS_STOPPED)
+		goto deallocate_exit;
+
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	hws_alert = 0;
+	deallocate_sdbt();
+
+	hws_state = HWS_DEALLOCATED;
+	rc = 0;
+
+deallocate_exit:
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
+
+unsigned long hwsampler_query_min_interval(void)
+{
+	return min_sampler_rate;
+}
+
+unsigned long hwsampler_query_max_interval(void)
+{
+	return max_sampler_rate;
+}
+
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
+{
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	return cb->sample_overflow;
+}
+
+int hwsampler_setup(void)
+{
+	int rc;
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state)
+		goto setup_exit;
+
+	hws_state = HWS_INIT;
+
+	init_all_cpu_buffers();
+
+	rc = check_hardware_prerequisites();
+	if (rc)
+		goto setup_exit;
+
+	rc = check_qsi_on_setup();
+	if (rc)
+		goto setup_exit;
+
+	rc = -EINVAL;
+	hws_wq = create_workqueue("hwsampler");
+	if (!hws_wq)
+		goto setup_exit;
+
+	register_cpu_notifier(&hws_cpu_notifier);
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		INIT_WORK(&cb->worker, worker);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (min_sampler_rate != cb->qsi.min_sampl_rate) {
+			if (min_sampler_rate) {
+				printk(KERN_WARNING
+					"hwsampler: different min sampler rate values.\n");
+				if (min_sampler_rate < cb->qsi.min_sampl_rate)
+					min_sampler_rate =
+						cb->qsi.min_sampl_rate;
+			} else
+				min_sampler_rate = cb->qsi.min_sampl_rate;
+		}
+		if (max_sampler_rate != cb->qsi.max_sampl_rate) {
+			if (max_sampler_rate) {
+				printk(KERN_WARNING
+					"hwsampler: different max sampler rate values.\n");
+				if (max_sampler_rate > cb->qsi.max_sampl_rate)
+					max_sampler_rate =
+						cb->qsi.max_sampl_rate;
+			} else
+				max_sampler_rate = cb->qsi.max_sampl_rate;
+		}
+	}
+	register_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
+
+	hws_state = HWS_DEALLOCATED;
+	rc = 0;
+
+setup_exit:
+	mutex_unlock(&hws_sem);
+	return rc;
+}
+
+int hwsampler_shutdown(void)
+{
+	int rc;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) {
+		mutex_unlock(&hws_sem);
+
+		if (hws_wq)
+			flush_workqueue(hws_wq);
+
+		mutex_lock(&hws_sem);
+
+		if (hws_state == HWS_STOPPED) {
+			irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+			hws_alert = 0;
+			deallocate_sdbt();
+		}
+		if (hws_wq) {
+			destroy_workqueue(hws_wq);
+			hws_wq = NULL;
+		}
+
+		unregister_external_irq(EXT_IRQ_MEASURE_ALERT, hws_ext_handler);
+		hws_state = HWS_INIT;
+		rc = 0;
+	}
+	mutex_unlock(&hws_sem);
+
+	unregister_cpu_notifier(&hws_cpu_notifier);
+
+	return rc;
+}
+
+/**
+ * hwsampler_start_all() - start hardware sampling on all online CPUs
+ * @rate:  specifies the used interval when samples are taken
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_start_all(unsigned long rate)
+{
+	int rc, cpu;
+
+	mutex_lock(&hws_sem);
+
+	hws_oom = 0;
+
+	rc = -EINVAL;
+	if (hws_state != HWS_STOPPED)
+		goto start_all_exit;
+
+	interval = rate;
+
+	/* fail if rate is not valid */
+	if (interval < min_sampler_rate || interval > max_sampler_rate)
+		goto start_all_exit;
+
+	rc = check_qsi_on_start();
+	if (rc)
+		goto start_all_exit;
+
+	prepare_cpu_buffers();
+
+	for_each_online_cpu(cpu) {
+		rc = start_sampling(cpu);
+		if (rc)
+			break;
+	}
+	if (rc) {
+		for_each_online_cpu(cpu) {
+			stop_sampling(cpu);
+		}
+		goto start_all_exit;
+	}
+	hws_state = HWS_STARTED;
+	rc = 0;
+
+start_all_exit:
+	mutex_unlock(&hws_sem);
+
+	if (rc)
+		return rc;
+
+	register_oom_notifier(&hws_oom_notifier);
+	hws_oom = 1;
+	hws_flush_all = 0;
+	/* now let them in, 1407 CPUMF external interrupts */
+	hws_alert = 1;
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+	return 0;
+}
+
+/**
+ * hwsampler_stop_all() - stop hardware sampling on all online CPUs
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_stop_all(void)
+{
+	int tmp_rc, rc, cpu;
+	struct hws_cpu_buffer *cb;
+
+	mutex_lock(&hws_sem);
+
+	rc = 0;
+	if (hws_state == HWS_INIT) {
+		mutex_unlock(&hws_sem);
+		return 0;
+	}
+	hws_state = HWS_STOPPING;
+	mutex_unlock(&hws_sem);
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		cb->stop_mode = 1;
+		tmp_rc = stop_sampling(cpu);
+		if (tmp_rc)
+			rc = tmp_rc;
+	}
+
+	if (hws_wq)
+		flush_workqueue(hws_wq);
+
+	mutex_lock(&hws_sem);
+	if (hws_oom) {
+		unregister_oom_notifier(&hws_oom_notifier);
+		hws_oom = 0;
+	}
+	hws_state = HWS_STOPPED;
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
new file mode 100644
index 000000000..a483d06f2
--- /dev/null
+++ b/arch/s390/oprofile/hwsampler.h
@@ -0,0 +1,63 @@
+/*
+ * CPUMF HW sampler functions and internal structures
+ *
+ *    Copyright IBM Corp. 2010
+ *    Author(s): Heinz Graalfs <graalfs@de.ibm.com>
+ */
+
+#ifndef HWSAMPLER_H_
+#define HWSAMPLER_H_
+
+#include <linux/workqueue.h>
+#include <asm/cpu_mf.h>
+
+struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
+{ /* bytes 0 - 7  Bit(s) */
+	unsigned int s:1;           /* 0: maximum buffer indicator       */
+	unsigned int h:1;           /* 1: part. level reserved for VM use*/
+	unsigned long b2_53:52;     /* 2-53: zeros                       */
+	unsigned int es:1;          /* 54: sampling enable control       */
+	unsigned int b55_61:7;      /* 55-61: - zeros                    */
+	unsigned int cs:1;          /* 62: sampling activation control   */
+	unsigned int b63:1;         /* 63: zero                          */
+	unsigned long interval;     /* 8-15: sampling interval           */
+	unsigned long tear;         /* 16-23: TEAR contents              */
+	unsigned long dear;         /* 24-31: DEAR contents              */
+	/* 32-63:                                                        */
+	unsigned long rsvrd1;       /* reserved                          */
+	unsigned long rsvrd2;       /* reserved                          */
+	unsigned long rsvrd3;       /* reserved                          */
+	unsigned long rsvrd4;       /* reserved                          */
+};
+
+struct hws_cpu_buffer {
+	unsigned long first_sdbt;       /* @ of 1st SDB-Table for this CP*/
+	unsigned long worker_entry;
+	unsigned long sample_overflow;  /* taken from SDB ...            */
+	struct hws_qsi_info_block qsi;
+	struct hws_ssctl_request_block ssctl;
+	struct work_struct worker;
+	atomic_t ext_params;
+	unsigned long req_alert;
+	unsigned long loss_of_sample_data;
+	unsigned long invalid_entry_address;
+	unsigned long incorrect_sdbt_entry;
+	unsigned long sample_auth_change_alert;
+	unsigned int finish:1;
+	unsigned int oom:1;
+	unsigned int stop_mode:1;
+};
+
+int hwsampler_setup(void);
+int hwsampler_shutdown(void);
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
+int hwsampler_deallocate(void);
+unsigned long hwsampler_query_min_interval(void);
+unsigned long hwsampler_query_max_interval(void);
+int hwsampler_start_all(unsigned long interval);
+int hwsampler_stop_all(void);
+int hwsampler_deactivate(unsigned int cpu);
+int hwsampler_activate(unsigned int cpu);
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
+
+#endif /*HWSAMPLER_H_*/
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
new file mode 100644
index 000000000..bc927a09a
--- /dev/null
+++ b/arch/s390/oprofile/init.c
@@ -0,0 +1,513 @@
+/*
+ * S390 Version
+ *   Copyright IBM Corp. 2002, 2011
+ *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
+ *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
+ *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
+ *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2002-2011 OProfile authors
+ */
+
+#include <linux/oprofile.h>
+#include <linux/perf_event.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <asm/processor.h>
+
+#include "../../../drivers/oprofile/oprof.h"
+
+extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
+
+#include "hwsampler.h"
+#include "op_counter.h"
+
+#define DEFAULT_INTERVAL	4127518
+
+#define DEFAULT_SDBT_BLOCKS	1
+#define DEFAULT_SDB_BLOCKS	511
+
+static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
+static unsigned long oprofile_min_interval;
+static unsigned long oprofile_max_interval;
+
+static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
+static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
+
+static int hwsampler_enabled;
+static int hwsampler_running;	/* start_mutex must be held to change */
+static int hwsampler_available;
+
+static struct oprofile_operations timer_ops;
+
+struct op_counter_config counter_config;
+
+enum __force_cpu_type {
+	reserved = 0,		/* do not force */
+	timer,
+};
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
+{
+	if (!strcmp(str, "timer")) {
+		force_cpu_type = timer;
+		printk(KERN_INFO "oprofile: forcing timer to be returned "
+		                 "as cpu type\n");
+	} else {
+		force_cpu_type = 0;
+	}
+
+	return 0;
+}
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
+		           "(report cpu_type \"timer\"");
+
+static int __oprofile_hwsampler_start(void)
+{
+	int retval;
+
+	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	if (retval)
+		return retval;
+
+	retval = hwsampler_start_all(oprofile_hw_interval);
+	if (retval)
+		hwsampler_deallocate();
+
+	return retval;
+}
+
+static int oprofile_hwsampler_start(void)
+{
+	int retval;
+
+	hwsampler_running = hwsampler_enabled;
+
+	if (!hwsampler_running)
+		return timer_ops.start();
+
+	retval = perf_reserve_sampling();
+	if (retval)
+		return retval;
+
+	retval = __oprofile_hwsampler_start();
+	if (retval)
+		perf_release_sampling();
+
+	return retval;
+}
+
+static void oprofile_hwsampler_stop(void)
+{
+	if (!hwsampler_running) {
+		timer_ops.stop();
+		return;
+	}
+
+	hwsampler_stop_all();
+	hwsampler_deallocate();
+	perf_release_sampling();
+	return;
+}
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/enabled
+ * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
+ */
+
+static ssize_t hwsampler_read(struct file *file, char __user *buf,
+		size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t hwsampler_write(struct file *file, char const __user *buf,
+		size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval <= 0)
+		return retval;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	if (oprofile_started)
+		/*
+		 * save to do without locking as we set
+		 * hwsampler_running in start() when start_mutex is
+		 * held
+		 */
+		return -EBUSY;
+
+	hwsampler_enabled = val;
+
+	return count;
+}
+
+static const struct file_operations hwsampler_fops = {
+	.read		= hwsampler_read,
+	.write		= hwsampler_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/count
+ * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
+ *
+ * Make sure that the value is within the hardware range.
+ */
+
+static ssize_t hw_interval_read(struct file *file, char __user *buf,
+				size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
+					count, offset);
+}
+
+static ssize_t hw_interval_write(struct file *file, char const __user *buf,
+				 size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval <= 0)
+		return retval;
+	if (val < oprofile_min_interval)
+		oprofile_hw_interval = oprofile_min_interval;
+	else if (val > oprofile_max_interval)
+		oprofile_hw_interval = oprofile_max_interval;
+	else
+		oprofile_hw_interval = val;
+
+	return count;
+}
+
+static const struct file_operations hw_interval_fops = {
+	.read		= hw_interval_read,
+	.write		= hw_interval_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/event
+ * Only a single event with number 0 is supported with this counter.
+ *
+ * /dev/oprofile/0/unit_mask
+ * This is a dummy file needed by the user space tools.
+ * No value other than 0 is accepted or returned.
+ */
+
+static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
+				    size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(0, buf, count, offset);
+}
+
+static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
+				     size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval <= 0)
+		return retval;
+	if (val != 0)
+		return -EINVAL;
+	return count;
+}
+
+static const struct file_operations zero_fops = {
+	.read		= hwsampler_zero_read,
+	.write		= hwsampler_zero_write,
+};
+
+/* /dev/oprofile/0/kernel file ops.  */
+
+static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
+				     size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(counter_config.kernel,
+					buf, count, offset);
+}
+
+static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
+				      size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval <= 0)
+		return retval;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	counter_config.kernel = val;
+
+	return count;
+}
+
+static const struct file_operations kernel_fops = {
+	.read		= hwsampler_kernel_read,
+	.write		= hwsampler_kernel_write,
+};
+
+/* /dev/oprofile/0/user file ops. */
+
+static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
+				   size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(counter_config.user,
+					buf, count, offset);
+}
+
+static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
+				    size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval <= 0)
+		return retval;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	counter_config.user = val;
+
+	return count;
+}
+
+static const struct file_operations user_fops = {
+	.read		= hwsampler_user_read,
+	.write		= hwsampler_user_write,
+};
+
+
+/*
+ * File ops used for: /dev/oprofile/timer/enabled
+ * The value always has to be the inverted value of hwsampler_enabled. So
+ * no separate variable is created. That way we do not need locking.
+ */
+
+static ssize_t timer_enabled_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
+				   size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval <= 0)
+		return retval;
+
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	/* Timer cannot be disabled without having hardware sampling.  */
+	if (val == 0 && !hwsampler_available)
+		return -EINVAL;
+
+	if (oprofile_started)
+		/*
+		 * save to do without locking as we set
+		 * hwsampler_running in start() when start_mutex is
+		 * held
+		 */
+		return -EBUSY;
+
+	hwsampler_enabled = !val;
+
+	return count;
+}
+
+static const struct file_operations timer_enabled_fops = {
+	.read		= timer_enabled_read,
+	.write		= timer_enabled_write,
+};
+
+
+static int oprofile_create_hwsampling_files(struct dentry *root)
+{
+	struct dentry *dir;
+
+	dir = oprofilefs_mkdir(root, "timer");
+	if (!dir)
+		return -EINVAL;
+
+	oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
+
+	if (!hwsampler_available)
+		return 0;
+
+	/* reinitialize default values */
+	hwsampler_enabled = 1;
+	counter_config.kernel = 1;
+	counter_config.user = 1;
+
+	if (!force_cpu_type) {
+		/*
+		 * Create the counter file system.  A single virtual
+		 * counter is created which can be used to
+		 * enable/disable hardware sampling dynamically from
+		 * user space.  The user space will configure a single
+		 * counter with a single event.  The value of 'event'
+		 * and 'unit_mask' are not evaluated by the kernel code
+		 * and can only be set to 0.
+		 */
+
+		dir = oprofilefs_mkdir(root, "0");
+		if (!dir)
+			return -EINVAL;
+
+		oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
+		oprofilefs_create_file(dir, "event", &zero_fops);
+		oprofilefs_create_file(dir, "count", &hw_interval_fops);
+		oprofilefs_create_file(dir, "unit_mask", &zero_fops);
+		oprofilefs_create_file(dir, "kernel", &kernel_fops);
+		oprofilefs_create_file(dir, "user", &user_fops);
+		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
+					&oprofile_sdbt_blocks);
+
+	} else {
+		/*
+		 * Hardware sampling can be used but the cpu_type is
+		 * forced to timer in order to deal with legacy user
+		 * space tools.  The /dev/oprofile/hwsampling fs is
+		 * provided in that case.
+		 */
+		dir = oprofilefs_mkdir(root, "hwsampling");
+		if (!dir)
+			return -EINVAL;
+
+		oprofilefs_create_file(dir, "hwsampler",
+				       &hwsampler_fops);
+		oprofilefs_create_file(dir, "hw_interval",
+				       &hw_interval_fops);
+		oprofilefs_create_ro_ulong(dir, "hw_min_interval",
+					   &oprofile_min_interval);
+		oprofilefs_create_ro_ulong(dir, "hw_max_interval",
+					   &oprofile_max_interval);
+		oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
+					&oprofile_sdbt_blocks);
+	}
+	return 0;
+}
+
+static int oprofile_hwsampler_init(struct oprofile_operations *ops)
+{
+	/*
+	 * Initialize the timer mode infrastructure as well in order
+	 * to be able to switch back dynamically.  oprofile_timer_init
+	 * is not supposed to fail.
+	 */
+	if (oprofile_timer_init(ops))
+		BUG();
+
+	memcpy(&timer_ops, ops, sizeof(timer_ops));
+	ops->create_files = oprofile_create_hwsampling_files;
+
+	/*
+	 * If the user space tools do not support newer cpu types,
+	 * the force_cpu_type module parameter
+	 * can be used to always return \"timer\" as cpu type.
+	 */
+	if (force_cpu_type != timer) {
+		struct cpuid id;
+
+		get_cpu_id (&id);
+
+		switch (id.machine) {
+		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
+		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
+		default: return -ENODEV;
+		}
+	}
+
+	if (hwsampler_setup())
+		return -ENODEV;
+
+	/*
+	 * Query the range for the sampling interval from the
+	 * hardware.
+	 */
+	oprofile_min_interval = hwsampler_query_min_interval();
+	if (oprofile_min_interval == 0)
+		return -ENODEV;
+	oprofile_max_interval = hwsampler_query_max_interval();
+	if (oprofile_max_interval == 0)
+		return -ENODEV;
+
+	/* The initial value should be sane */
+	if (oprofile_hw_interval < oprofile_min_interval)
+		oprofile_hw_interval = oprofile_min_interval;
+	if (oprofile_hw_interval > oprofile_max_interval)
+		oprofile_hw_interval = oprofile_max_interval;
+
+	printk(KERN_INFO "oprofile: System z hardware sampling "
+	       "facility found.\n");
+
+	ops->start = oprofile_hwsampler_start;
+	ops->stop = oprofile_hwsampler_stop;
+
+	return 0;
+}
+
+static void oprofile_hwsampler_exit(void)
+{
+	hwsampler_shutdown();
+}
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	ops->backtrace = s390_backtrace;
+
+	/*
+	 * -ENODEV is not reported to the caller.  The module itself
+         * will use the timer mode sampling as fallback and this is
+         * always available.
+	 */
+	hwsampler_available = oprofile_hwsampler_init(ops) == 0;
+
+	return 0;
+}
+
+void oprofile_arch_exit(void)
+{
+	oprofile_hwsampler_exit();
+}
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
new file mode 100644
index 000000000..61b2531ee
--- /dev/null
+++ b/arch/s390/oprofile/op_counter.h
@@ -0,0 +1,21 @@
+/*
+ *   Copyright IBM Corp. 2011
+ *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2011 OProfile authors
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+struct op_counter_config {
+	/* `enabled' maps to the hwsampler_file variable.  */
+	/* `count' maps to the oprofile_hw_interval variable.  */
+	/* `event' and `unit_mask' are unused. */
+	unsigned long kernel;
+	unsigned long user;
+};
+
+extern struct op_counter_config counter_config;
+
+#endif /* OP_COUNTER_H */
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
new file mode 100644
index 000000000..805d8b291
--- /dev/null
+++ b/arch/s390/pci/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the s390 PCI subsystem.
+#
+
+obj-$(CONFIG_PCI)	+= pci.o pci_dma.o pci_clp.o pci_sysfs.o \
+			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
new file mode 100644
index 000000000..598f023cf
--- /dev/null
+++ b/arch/s390/pci/pci.c
@@ -0,0 +1,965 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ *
+ * The System z PCI code is a rewrite from a prototype by
+ * the following people (Kudoz!):
+ *   Alexander Schmidt
+ *   Christoph Raisch
+ *   Hannes Hering
+ *   Hoang-Nam Nguyen
+ *   Jan-Bernd Themann
+ *   Stefan Roscher
+ *   Thomas Klein
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+
+#include <asm/isc.h>
+#include <asm/airq.h>
+#include <asm/facility.h>
+#include <asm/pci_insn.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#define DEBUG				/* enable pr_debug */
+
+#define	SIC_IRQ_MODE_ALL		0
+#define	SIC_IRQ_MODE_SINGLE		1
+
+#define ZPCI_NR_DMA_SPACES		1
+#define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
+
+/* list of all detected zpci devices */
+static LIST_HEAD(zpci_list);
+static DEFINE_SPINLOCK(zpci_list_lock);
+
+static struct irq_chip zpci_irq_chip = {
+	.name = "zPCI",
+	.irq_unmask = pci_msi_unmask_irq,
+	.irq_mask = pci_msi_mask_irq,
+};
+
+static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DEFINE_SPINLOCK(zpci_domain_lock);
+
+static struct airq_iv *zpci_aisb_iv;
+static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
+
+/* Adapter interrupt definitions */
+static void zpci_irq_handler(struct airq_struct *airq);
+
+static struct airq_struct zpci_airq = {
+	.handler = zpci_irq_handler,
+	.isc = PCI_ISC,
+};
+
+/* I/O Map */
+static DEFINE_SPINLOCK(zpci_iomap_lock);
+static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+struct zpci_iomap_entry *zpci_iomap_start;
+EXPORT_SYMBOL_GPL(zpci_iomap_start);
+
+static struct kmem_cache *zdev_fmb_cache;
+
+struct zpci_dev *get_zdev(struct pci_dev *pdev)
+{
+	return (struct zpci_dev *) pdev->sysdata;
+}
+
+struct zpci_dev *get_zdev_by_fid(u32 fid)
+{
+	struct zpci_dev *tmp, *zdev = NULL;
+
+	spin_lock(&zpci_list_lock);
+	list_for_each_entry(tmp, &zpci_list, entry) {
+		if (tmp->fid == fid) {
+			zdev = tmp;
+			break;
+		}
+	}
+	spin_unlock(&zpci_list_lock);
+	return zdev;
+}
+
+static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
+{
+	return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
+}
+
+int pci_domain_nr(struct pci_bus *bus)
+{
+	return ((struct zpci_dev *) bus->sysdata)->domain;
+}
+EXPORT_SYMBOL_GPL(pci_domain_nr);
+
+int pci_proc_domain(struct pci_bus *bus)
+{
+	return pci_domain_nr(bus);
+}
+EXPORT_SYMBOL_GPL(pci_proc_domain);
+
+/* Modify PCI: Register adapter interruptions */
+static int zpci_set_airq(struct zpci_dev *zdev)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+	struct zpci_fib fib = {0};
+
+	fib.isc = PCI_ISC;
+	fib.sum = 1;		/* enable summary notifications */
+	fib.noi = airq_iv_end(zdev->aibv);
+	fib.aibv = (unsigned long) zdev->aibv->vector;
+	fib.aibvo = 0;		/* each zdev has its own interrupt vector */
+	fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
+	fib.aisbo = zdev->aisb & 63;
+
+	return zpci_mod_fc(req, &fib);
+}
+
+struct mod_pci_args {
+	u64 base;
+	u64 limit;
+	u64 iota;
+	u64 fmb_addr;
+};
+
+static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn);
+	struct zpci_fib fib = {0};
+
+	fib.pba = args->base;
+	fib.pal = args->limit;
+	fib.iota = args->iota;
+	fib.fmb_addr = args->fmb_addr;
+
+	return zpci_mod_fc(req, &fib);
+}
+
+/* Modify PCI: Register I/O address translation parameters */
+int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
+		       u64 base, u64 limit, u64 iota)
+{
+	struct mod_pci_args args = { base, limit, iota, 0 };
+
+	WARN_ON_ONCE(iota & 0x3fff);
+	args.iota |= ZPCI_IOTA_RTTO_FLAG;
+	return mod_pci(zdev, ZPCI_MOD_FC_REG_IOAT, dmaas, &args);
+}
+
+/* Modify PCI: Unregister I/O address translation parameters */
+int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
+{
+	struct mod_pci_args args = { 0, 0, 0, 0 };
+
+	return mod_pci(zdev, ZPCI_MOD_FC_DEREG_IOAT, dmaas, &args);
+}
+
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_clear_airq(struct zpci_dev *zdev)
+{
+	struct mod_pci_args args = { 0, 0, 0, 0 };
+
+	return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args);
+}
+
+/* Modify PCI: Set PCI function measurement parameters */
+int zpci_fmb_enable_device(struct zpci_dev *zdev)
+{
+	struct mod_pci_args args = { 0, 0, 0, 0 };
+
+	if (zdev->fmb)
+		return -EINVAL;
+
+	zdev->fmb = kmem_cache_zalloc(zdev_fmb_cache, GFP_KERNEL);
+	if (!zdev->fmb)
+		return -ENOMEM;
+	WARN_ON((u64) zdev->fmb & 0xf);
+
+	/* reset software counters */
+	atomic64_set(&zdev->allocated_pages, 0);
+	atomic64_set(&zdev->mapped_pages, 0);
+	atomic64_set(&zdev->unmapped_pages, 0);
+
+	args.fmb_addr = virt_to_phys(zdev->fmb);
+	return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
+}
+
+/* Modify PCI: Disable PCI function measurement */
+int zpci_fmb_disable_device(struct zpci_dev *zdev)
+{
+	struct mod_pci_args args = { 0, 0, 0, 0 };
+	int rc;
+
+	if (!zdev->fmb)
+		return -EINVAL;
+
+	/* Function measurement is disabled if fmb address is zero */
+	rc = mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args);
+
+	kmem_cache_free(zdev_fmb_cache, zdev->fmb);
+	zdev->fmb = NULL;
+	return rc;
+}
+
+#define ZPCI_PCIAS_CFGSPC	15
+
+static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
+	u64 data;
+	int rc;
+
+	rc = zpci_load(&data, req, offset);
+	if (!rc) {
+		data = data << ((8 - len) * 8);
+		data = le64_to_cpu(data);
+		*val = (u32) data;
+	} else
+		*val = 0xffffffff;
+	return rc;
+}
+
+static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
+{
+	u64 req = ZPCI_CREATE_REQ(zdev->fh, ZPCI_PCIAS_CFGSPC, len);
+	u64 data = val;
+	int rc;
+
+	data = cpu_to_le64(data);
+	data = data >> ((8 - len) * 8);
+	rc = zpci_store(data, req, offset);
+	return rc;
+}
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+}
+
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+				       resource_size_t size,
+				       resource_size_t align)
+{
+	return 0;
+}
+
+/* combine single writes by using store-block insn */
+void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
+{
+       zpci_memcpy_toio(to, from, count);
+}
+
+/* Create a virtual mapping cookie for a PCI BAR */
+void __iomem *pci_iomap_range(struct pci_dev *pdev,
+			      int bar,
+			      unsigned long offset,
+			      unsigned long max)
+{
+	struct zpci_dev *zdev =	get_zdev(pdev);
+	u64 addr;
+	int idx;
+
+	if ((bar & 7) != bar)
+		return NULL;
+
+	idx = zdev->bars[bar].map_idx;
+	spin_lock(&zpci_iomap_lock);
+	if (zpci_iomap_start[idx].count++) {
+		BUG_ON(zpci_iomap_start[idx].fh != zdev->fh ||
+		       zpci_iomap_start[idx].bar != bar);
+	} else {
+		zpci_iomap_start[idx].fh = zdev->fh;
+		zpci_iomap_start[idx].bar = bar;
+	}
+	/* Detect overrun */
+	BUG_ON(!zpci_iomap_start[idx].count);
+	spin_unlock(&zpci_iomap_lock);
+
+	addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
+	return (void __iomem *) addr + offset;
+}
+EXPORT_SYMBOL(pci_iomap_range);
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+	return pci_iomap_range(dev, bar, 0, maxlen);
+}
+EXPORT_SYMBOL(pci_iomap);
+
+void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+	unsigned int idx;
+
+	idx = (((__force u64) addr) & ~ZPCI_IOMAP_ADDR_BASE) >> 48;
+	spin_lock(&zpci_iomap_lock);
+	/* Detect underrun */
+	BUG_ON(!zpci_iomap_start[idx].count);
+	if (!--zpci_iomap_start[idx].count) {
+		zpci_iomap_start[idx].fh = 0;
+		zpci_iomap_start[idx].bar = 0;
+	}
+	spin_unlock(&zpci_iomap_lock);
+}
+EXPORT_SYMBOL(pci_iounmap);
+
+static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
+		    int size, u32 *val)
+{
+	struct zpci_dev *zdev = get_zdev_by_bus(bus);
+	int ret;
+
+	if (!zdev || devfn != ZPCI_DEVFN)
+		ret = -ENODEV;
+	else
+		ret = zpci_cfg_load(zdev, where, val, size);
+
+	return ret;
+}
+
+static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
+		     int size, u32 val)
+{
+	struct zpci_dev *zdev = get_zdev_by_bus(bus);
+	int ret;
+
+	if (!zdev || devfn != ZPCI_DEVFN)
+		ret = -ENODEV;
+	else
+		ret = zpci_cfg_store(zdev, where, val, size);
+
+	return ret;
+}
+
+static struct pci_ops pci_root_ops = {
+	.read = pci_read,
+	.write = pci_write,
+};
+
+static void zpci_irq_handler(struct airq_struct *airq)
+{
+	unsigned long si, ai;
+	struct airq_iv *aibv;
+	int irqs_on = 0;
+
+	inc_irq_stat(IRQIO_PCI);
+	for (si = 0;;) {
+		/* Scan adapter summary indicator bit vector */
+		si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
+		if (si == -1UL) {
+			if (irqs_on++)
+				/* End of second scan with interrupts on. */
+				break;
+			/* First scan complete, reenable interrupts. */
+			zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+			si = 0;
+			continue;
+		}
+
+		/* Scan the adapter interrupt vector for this device. */
+		aibv = zpci_aibv[si];
+		for (ai = 0;;) {
+			ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+			if (ai == -1UL)
+				break;
+			inc_irq_stat(IRQIO_MSI);
+			airq_iv_lock(aibv, ai);
+			generic_handle_irq(airq_iv_get_data(aibv, ai));
+			airq_iv_unlock(aibv, ai);
+		}
+	}
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+	unsigned int hwirq, msi_vecs;
+	unsigned long aisb;
+	struct msi_desc *msi;
+	struct msi_msg msg;
+	int rc, irq;
+
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+	msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+
+	/* Allocate adapter summary indicator bit */
+	rc = -EIO;
+	aisb = airq_iv_alloc_bit(zpci_aisb_iv);
+	if (aisb == -1UL)
+		goto out;
+	zdev->aisb = aisb;
+
+	/* Create adapter interrupt vector */
+	rc = -ENOMEM;
+	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+	if (!zdev->aibv)
+		goto out_si;
+
+	/* Wire up shortcut pointer */
+	zpci_aibv[aisb] = zdev->aibv;
+
+	/* Request MSI interrupts */
+	hwirq = 0;
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		rc = -EIO;
+		irq = irq_alloc_desc(0);	/* Alloc irq on node 0 */
+		if (irq < 0)
+			goto out_msi;
+		rc = irq_set_msi_desc(irq, msi);
+		if (rc)
+			goto out_msi;
+		irq_set_chip_and_handler(irq, &zpci_irq_chip,
+					 handle_simple_irq);
+		msg.data = hwirq;
+		msg.address_lo = zdev->msi_addr & 0xffffffff;
+		msg.address_hi = zdev->msi_addr >> 32;
+		pci_write_msi_msg(irq, &msg);
+		airq_iv_set_data(zdev->aibv, hwirq, irq);
+		hwirq++;
+	}
+
+	/* Enable adapter interrupts */
+	rc = zpci_set_airq(zdev);
+	if (rc)
+		goto out_msi;
+
+	return (msi_vecs == nvec) ? 0 : msi_vecs;
+
+out_msi:
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		if (hwirq-- == 0)
+			break;
+		irq_set_msi_desc(msi->irq, NULL);
+		irq_free_desc(msi->irq);
+		msi->msg.address_lo = 0;
+		msi->msg.address_hi = 0;
+		msi->msg.data = 0;
+		msi->irq = 0;
+	}
+	zpci_aibv[aisb] = NULL;
+	airq_iv_release(zdev->aibv);
+out_si:
+	airq_iv_free_bit(zpci_aisb_iv, aisb);
+out:
+	return rc;
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+	struct msi_desc *msi;
+	int rc;
+
+	/* Disable adapter interrupts */
+	rc = zpci_clear_airq(zdev);
+	if (rc)
+		return;
+
+	/* Release MSI interrupts */
+	list_for_each_entry(msi, &pdev->msi_list, list) {
+		if (msi->msi_attrib.is_msix)
+			__pci_msix_desc_mask_irq(msi, 1);
+		else
+			__pci_msi_desc_mask_irq(msi, 1, 1);
+		irq_set_msi_desc(msi->irq, NULL);
+		irq_free_desc(msi->irq);
+		msi->msg.address_lo = 0;
+		msi->msg.address_hi = 0;
+		msi->msg.data = 0;
+		msi->irq = 0;
+	}
+
+	zpci_aibv[zdev->aisb] = NULL;
+	airq_iv_release(zdev->aibv);
+	airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
+}
+
+static void zpci_map_resources(struct pci_dev *pdev)
+{
+	resource_size_t len;
+	int i;
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		len = pci_resource_len(pdev, i);
+		if (!len)
+			continue;
+		pdev->resource[i].start =
+			(resource_size_t __force) pci_iomap(pdev, i, 0);
+		pdev->resource[i].end = pdev->resource[i].start + len - 1;
+	}
+}
+
+static void zpci_unmap_resources(struct pci_dev *pdev)
+{
+	resource_size_t len;
+	int i;
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		len = pci_resource_len(pdev, i);
+		if (!len)
+			continue;
+		pci_iounmap(pdev, (void __iomem __force *)
+			    pdev->resource[i].start);
+	}
+}
+
+static int __init zpci_irq_init(void)
+{
+	int rc;
+
+	rc = register_adapter_interrupt(&zpci_airq);
+	if (rc)
+		goto out;
+	/* Set summary to 1 to be called every time for the ISC. */
+	*zpci_airq.lsi_ptr = 1;
+
+	rc = -ENOMEM;
+	zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+	if (!zpci_aisb_iv)
+		goto out_airq;
+
+	zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
+	return 0;
+
+out_airq:
+	unregister_adapter_interrupt(&zpci_airq);
+out:
+	return rc;
+}
+
+static void zpci_irq_exit(void)
+{
+	airq_iv_release(zpci_aisb_iv);
+	unregister_adapter_interrupt(&zpci_airq);
+}
+
+static int zpci_alloc_iomap(struct zpci_dev *zdev)
+{
+	int entry;
+
+	spin_lock(&zpci_iomap_lock);
+	entry = find_first_zero_bit(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES);
+	if (entry == ZPCI_IOMAP_MAX_ENTRIES) {
+		spin_unlock(&zpci_iomap_lock);
+		return -ENOSPC;
+	}
+	set_bit(entry, zpci_iomap);
+	spin_unlock(&zpci_iomap_lock);
+	return entry;
+}
+
+static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
+{
+	spin_lock(&zpci_iomap_lock);
+	memset(&zpci_iomap_start[entry], 0, sizeof(struct zpci_iomap_entry));
+	clear_bit(entry, zpci_iomap);
+	spin_unlock(&zpci_iomap_lock);
+}
+
+static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
+				    unsigned long size, unsigned long flags)
+{
+	struct resource *r;
+
+	r = kzalloc(sizeof(*r), GFP_KERNEL);
+	if (!r)
+		return NULL;
+
+	r->start = start;
+	r->end = r->start + size - 1;
+	r->flags = flags;
+	r->name = zdev->res_name;
+
+	if (request_resource(&iomem_resource, r)) {
+		kfree(r);
+		return NULL;
+	}
+	return r;
+}
+
+static int zpci_setup_bus_resources(struct zpci_dev *zdev,
+				    struct list_head *resources)
+{
+	unsigned long addr, size, flags;
+	struct resource *res;
+	int i, entry;
+
+	snprintf(zdev->res_name, sizeof(zdev->res_name),
+		 "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		if (!zdev->bars[i].size)
+			continue;
+		entry = zpci_alloc_iomap(zdev);
+		if (entry < 0)
+			return entry;
+		zdev->bars[i].map_idx = entry;
+
+		/* only MMIO is supported */
+		flags = IORESOURCE_MEM;
+		if (zdev->bars[i].val & 8)
+			flags |= IORESOURCE_PREFETCH;
+		if (zdev->bars[i].val & 4)
+			flags |= IORESOURCE_MEM_64;
+
+		addr = ZPCI_IOMAP_ADDR_BASE + ((u64) entry << 48);
+
+		size = 1UL << zdev->bars[i].size;
+
+		res = __alloc_res(zdev, addr, size, flags);
+		if (!res) {
+			zpci_free_iomap(zdev, entry);
+			return -ENOMEM;
+		}
+		zdev->bars[i].res = res;
+		pci_add_resource(resources, res);
+	}
+
+	return 0;
+}
+
+static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
+{
+	int i;
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		if (!zdev->bars[i].size)
+			continue;
+
+		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
+		release_resource(zdev->bars[i].res);
+		kfree(zdev->bars[i].res);
+	}
+}
+
+int pcibios_add_device(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+	struct resource *res;
+	int i;
+
+	zdev->pdev = pdev;
+	pdev->dev.groups = zpci_attr_groups;
+	zpci_map_resources(pdev);
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		res = &pdev->resource[i];
+		if (res->parent || !res->flags)
+			continue;
+		pci_claim_resource(pdev, i);
+	}
+
+	return 0;
+}
+
+void pcibios_release_device(struct pci_dev *pdev)
+{
+	zpci_unmap_resources(pdev);
+}
+
+int pcibios_enable_device(struct pci_dev *pdev, int mask)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+
+	zdev->pdev = pdev;
+	zpci_debug_init_device(zdev);
+	zpci_fmb_enable_device(zdev);
+
+	return pci_enable_resources(pdev, mask);
+}
+
+void pcibios_disable_device(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = get_zdev(pdev);
+
+	zpci_fmb_disable_device(zdev);
+	zpci_debug_exit_device(zdev);
+	zdev->pdev = NULL;
+}
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+static int zpci_restore(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
+	int ret = 0;
+
+	if (zdev->state != ZPCI_FN_STATE_ONLINE)
+		goto out;
+
+	ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
+	if (ret)
+		goto out;
+
+	zpci_map_resources(pdev);
+	zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
+			   zdev->start_dma + zdev->iommu_size - 1,
+			   (u64) zdev->dma_table);
+
+out:
+	return ret;
+}
+
+static int zpci_freeze(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
+
+	if (zdev->state != ZPCI_FN_STATE_ONLINE)
+		return 0;
+
+	zpci_unregister_ioat(zdev, 0);
+	zpci_unmap_resources(pdev);
+	return clp_disable_fh(zdev);
+}
+
+struct dev_pm_ops pcibios_pm_ops = {
+	.thaw_noirq = zpci_restore,
+	.freeze_noirq = zpci_freeze,
+	.restore_noirq = zpci_restore,
+	.poweroff_noirq = zpci_freeze,
+};
+#endif /* CONFIG_HIBERNATE_CALLBACKS */
+
+static int zpci_alloc_domain(struct zpci_dev *zdev)
+{
+	spin_lock(&zpci_domain_lock);
+	zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
+	if (zdev->domain == ZPCI_NR_DEVICES) {
+		spin_unlock(&zpci_domain_lock);
+		return -ENOSPC;
+	}
+	set_bit(zdev->domain, zpci_domain);
+	spin_unlock(&zpci_domain_lock);
+	return 0;
+}
+
+static void zpci_free_domain(struct zpci_dev *zdev)
+{
+	spin_lock(&zpci_domain_lock);
+	clear_bit(zdev->domain, zpci_domain);
+	spin_unlock(&zpci_domain_lock);
+}
+
+void pcibios_remove_bus(struct pci_bus *bus)
+{
+	struct zpci_dev *zdev = get_zdev_by_bus(bus);
+
+	zpci_exit_slot(zdev);
+	zpci_cleanup_bus_resources(zdev);
+	zpci_free_domain(zdev);
+
+	spin_lock(&zpci_list_lock);
+	list_del(&zdev->entry);
+	spin_unlock(&zpci_list_lock);
+
+	kfree(zdev);
+}
+
+static int zpci_scan_bus(struct zpci_dev *zdev)
+{
+	LIST_HEAD(resources);
+	int ret;
+
+	ret = zpci_setup_bus_resources(zdev, &resources);
+	if (ret)
+		return ret;
+
+	zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
+				      zdev, &resources);
+	if (!zdev->bus) {
+		zpci_cleanup_bus_resources(zdev);
+		return -EIO;
+	}
+	zdev->bus->max_bus_speed = zdev->max_bus_speed;
+	pci_bus_add_devices(zdev->bus);
+	return 0;
+}
+
+int zpci_enable_device(struct zpci_dev *zdev)
+{
+	int rc;
+
+	rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
+	if (rc)
+		goto out;
+
+	rc = zpci_dma_init_device(zdev);
+	if (rc)
+		goto out_dma;
+
+	zdev->state = ZPCI_FN_STATE_ONLINE;
+	return 0;
+
+out_dma:
+	clp_disable_fh(zdev);
+out:
+	return rc;
+}
+EXPORT_SYMBOL_GPL(zpci_enable_device);
+
+int zpci_disable_device(struct zpci_dev *zdev)
+{
+	zpci_dma_exit_device(zdev);
+	return clp_disable_fh(zdev);
+}
+EXPORT_SYMBOL_GPL(zpci_disable_device);
+
+int zpci_create_device(struct zpci_dev *zdev)
+{
+	int rc;
+
+	rc = zpci_alloc_domain(zdev);
+	if (rc)
+		goto out;
+
+	mutex_init(&zdev->lock);
+	if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
+		rc = zpci_enable_device(zdev);
+		if (rc)
+			goto out_free;
+	}
+	rc = zpci_scan_bus(zdev);
+	if (rc)
+		goto out_disable;
+
+	spin_lock(&zpci_list_lock);
+	list_add_tail(&zdev->entry, &zpci_list);
+	spin_unlock(&zpci_list_lock);
+
+	zpci_init_slot(zdev);
+
+	return 0;
+
+out_disable:
+	if (zdev->state == ZPCI_FN_STATE_ONLINE)
+		zpci_disable_device(zdev);
+out_free:
+	zpci_free_domain(zdev);
+out:
+	return rc;
+}
+
+void zpci_stop_device(struct zpci_dev *zdev)
+{
+	zpci_dma_exit_device(zdev);
+	/*
+	 * Note: SCLP disables fh via set-pci-fn so don't
+	 * do that here.
+	 */
+}
+EXPORT_SYMBOL_GPL(zpci_stop_device);
+
+static inline int barsize(u8 size)
+{
+	return (size) ? (1 << size) >> 10 : 0;
+}
+
+static int zpci_mem_init(void)
+{
+	zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb),
+				16, 0, NULL);
+	if (!zdev_fmb_cache)
+		goto error_zdev;
+
+	/* TODO: use realloc */
+	zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start),
+				   GFP_KERNEL);
+	if (!zpci_iomap_start)
+		goto error_iomap;
+	return 0;
+
+error_iomap:
+	kmem_cache_destroy(zdev_fmb_cache);
+error_zdev:
+	return -ENOMEM;
+}
+
+static void zpci_mem_exit(void)
+{
+	kfree(zpci_iomap_start);
+	kmem_cache_destroy(zdev_fmb_cache);
+}
+
+static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_initialized;
+
+char * __init pcibios_setup(char *str)
+{
+	if (!strcmp(str, "off")) {
+		s390_pci_probe = 0;
+		return NULL;
+	}
+	return str;
+}
+
+bool zpci_is_enabled(void)
+{
+	return s390_pci_initialized;
+}
+
+static int __init pci_base_init(void)
+{
+	int rc;
+
+	if (!s390_pci_probe)
+		return 0;
+
+	if (!test_facility(69) || !test_facility(71) || !test_facility(72))
+		return 0;
+
+	rc = zpci_debug_init();
+	if (rc)
+		goto out;
+
+	rc = zpci_mem_init();
+	if (rc)
+		goto out_mem;
+
+	rc = zpci_irq_init();
+	if (rc)
+		goto out_irq;
+
+	rc = zpci_dma_init();
+	if (rc)
+		goto out_dma;
+
+	rc = clp_scan_pci_devices();
+	if (rc)
+		goto out_find;
+
+	s390_pci_initialized = 1;
+	return 0;
+
+out_find:
+	zpci_dma_exit();
+out_dma:
+	zpci_irq_exit();
+out_irq:
+	zpci_mem_exit();
+out_mem:
+	zpci_debug_exit();
+out:
+	return rc;
+}
+subsys_initcall_sync(pci_base_init);
+
+void zpci_rescan(void)
+{
+	if (zpci_is_enabled())
+		clp_rescan_pci_devices_simple();
+}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
new file mode 100644
index 000000000..d6e411ed8
--- /dev/null
+++ b/arch/s390/pci/pci_clp.c
@@ -0,0 +1,393 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <asm/pci_debug.h>
+#include <asm/pci_clp.h>
+
+static inline void zpci_err_clp(unsigned int rsp, int rc)
+{
+	struct {
+		unsigned int rsp;
+		int rc;
+	} __packed data = {rsp, rc};
+
+	zpci_err_hex(&data, sizeof(data));
+}
+
+/*
+ * Call Logical Processor
+ * Retry logic is handled by the caller.
+ */
+static inline u8 clp_instr(void *data)
+{
+	struct { u8 _[CLP_BLK_SIZE]; } *req = data;
+	u64 ignored;
+	u8 cc;
+
+	asm volatile (
+		"	.insn	rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req)
+		: [req] "a" (req)
+		: "cc");
+	return cc;
+}
+
+static void *clp_alloc_block(gfp_t gfp_mask)
+{
+	return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE));
+}
+
+static void clp_free_block(void *ptr)
+{
+	free_pages((unsigned long) ptr, get_order(CLP_BLK_SIZE));
+}
+
+static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
+				      struct clp_rsp_query_pci_grp *response)
+{
+	zdev->tlb_refresh = response->refresh;
+	zdev->dma_mask = response->dasm;
+	zdev->msi_addr = response->msia;
+	zdev->max_msi = response->noi;
+	zdev->fmb_update = response->mui;
+
+	switch (response->version) {
+	case 1:
+		zdev->max_bus_speed = PCIE_SPEED_5_0GT;
+		break;
+	default:
+		zdev->max_bus_speed = PCI_SPEED_UNKNOWN;
+		break;
+	}
+}
+
+static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+{
+	struct clp_req_rsp_query_pci_grp *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	memset(rrb, 0, sizeof(*rrb));
+	rrb->request.hdr.len = sizeof(rrb->request);
+	rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
+	rrb->response.hdr.len = sizeof(rrb->response);
+	rrb->request.pfgid = pfgid;
+
+	rc = clp_instr(rrb);
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+		clp_store_query_pci_fngrp(zdev, &rrb->response);
+	else {
+		zpci_err("Q PCI FGRP:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		rc = -EIO;
+	}
+	clp_free_block(rrb);
+	return rc;
+}
+
+static int clp_store_query_pci_fn(struct zpci_dev *zdev,
+				  struct clp_rsp_query_pci *response)
+{
+	int i;
+
+	for (i = 0; i < PCI_BAR_COUNT; i++) {
+		zdev->bars[i].val = le32_to_cpu(response->bar[i]);
+		zdev->bars[i].size = response->bar_size[i];
+	}
+	zdev->start_dma = response->sdma;
+	zdev->end_dma = response->edma;
+	zdev->pchid = response->pchid;
+	zdev->pfgid = response->pfgid;
+	zdev->pft = response->pft;
+	zdev->vfn = response->vfn;
+	zdev->uid = response->uid;
+
+	memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
+	if (response->util_str_avail) {
+		memcpy(zdev->util_str, response->util_str,
+		       sizeof(zdev->util_str));
+	}
+
+	return 0;
+}
+
+static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+{
+	struct clp_req_rsp_query_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	memset(rrb, 0, sizeof(*rrb));
+	rrb->request.hdr.len = sizeof(rrb->request);
+	rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+	rrb->response.hdr.len = sizeof(rrb->response);
+	rrb->request.fh = fh;
+
+	rc = clp_instr(rrb);
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+		rc = clp_store_query_pci_fn(zdev, &rrb->response);
+		if (rc)
+			goto out;
+		if (rrb->response.pfgid)
+			rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
+	} else {
+		zpci_err("Q PCI FN:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		rc = -EIO;
+	}
+out:
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_add_pci_device(u32 fid, u32 fh, int configured)
+{
+	struct zpci_dev *zdev;
+	int rc;
+
+	zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
+	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
+	if (!zdev)
+		return -ENOMEM;
+
+	zdev->fh = fh;
+	zdev->fid = fid;
+
+	/* Query function properties and update zdev */
+	rc = clp_query_pci_fn(zdev, fh);
+	if (rc)
+		goto error;
+
+	if (configured)
+		zdev->state = ZPCI_FN_STATE_CONFIGURED;
+	else
+		zdev->state = ZPCI_FN_STATE_STANDBY;
+
+	rc = zpci_create_device(zdev);
+	if (rc)
+		goto error;
+	return 0;
+
+error:
+	kfree(zdev);
+	return rc;
+}
+
+/*
+ * Enable/Disable a given PCI function defined by its function handle.
+ */
+static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
+{
+	struct clp_req_rsp_set_pci *rrb;
+	int rc, retries = 100;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	do {
+		memset(rrb, 0, sizeof(*rrb));
+		rrb->request.hdr.len = sizeof(rrb->request);
+		rrb->request.hdr.cmd = CLP_SET_PCI_FN;
+		rrb->response.hdr.len = sizeof(rrb->response);
+		rrb->request.fh = *fh;
+		rrb->request.oc = command;
+		rrb->request.ndas = nr_dma_as;
+
+		rc = clp_instr(rrb);
+		if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) {
+			retries--;
+			if (retries < 0)
+				break;
+			msleep(20);
+		}
+	} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
+
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+		*fh = rrb->response.fh;
+	else {
+		zpci_err("Set PCI FN:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		rc = -EIO;
+	}
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
+{
+	u32 fh = zdev->fh;
+	int rc;
+
+	rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
+	if (!rc)
+		/* Success -> store enabled handle in zdev */
+		zdev->fh = fh;
+
+	zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+	return rc;
+}
+
+int clp_disable_fh(struct zpci_dev *zdev)
+{
+	u32 fh = zdev->fh;
+	int rc;
+
+	if (!zdev_enabled(zdev))
+		return 0;
+
+	rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
+	if (!rc)
+		/* Success -> store disabled handle in zdev */
+		zdev->fh = fh;
+
+	zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+	return rc;
+}
+
+static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
+			void (*cb)(struct clp_fh_list_entry *entry))
+{
+	u64 resume_token = 0;
+	int entries, i, rc;
+
+	do {
+		memset(rrb, 0, sizeof(*rrb));
+		rrb->request.hdr.len = sizeof(rrb->request);
+		rrb->request.hdr.cmd = CLP_LIST_PCI;
+		/* store as many entries as possible */
+		rrb->response.hdr.len = CLP_BLK_SIZE - LIST_PCI_HDR_LEN;
+		rrb->request.resume_token = resume_token;
+
+		/* Get PCI function handle list */
+		rc = clp_instr(rrb);
+		if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
+			zpci_err("List PCI FN:\n");
+			zpci_err_clp(rrb->response.hdr.rsp, rc);
+			rc = -EIO;
+			goto out;
+		}
+
+		WARN_ON_ONCE(rrb->response.entry_size !=
+			sizeof(struct clp_fh_list_entry));
+
+		entries = (rrb->response.hdr.len - LIST_PCI_HDR_LEN) /
+			rrb->response.entry_size;
+
+		resume_token = rrb->response.resume_token;
+		for (i = 0; i < entries; i++)
+			cb(&rrb->response.fh_list[i]);
+	} while (resume_token);
+out:
+	return rc;
+}
+
+static void __clp_add(struct clp_fh_list_entry *entry)
+{
+	if (!entry->vendor_id)
+		return;
+
+	clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+}
+
+static void __clp_rescan(struct clp_fh_list_entry *entry)
+{
+	struct zpci_dev *zdev;
+
+	if (!entry->vendor_id)
+		return;
+
+	zdev = get_zdev_by_fid(entry->fid);
+	if (!zdev) {
+		clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
+		return;
+	}
+
+	if (!entry->config_state) {
+		/*
+		 * The handle is already disabled, that means no iota/irq freeing via
+		 * the firmware interfaces anymore. Need to free resources manually
+		 * (DMA memory, debug, sysfs)...
+		 */
+		zpci_stop_device(zdev);
+	}
+}
+
+static void __clp_update(struct clp_fh_list_entry *entry)
+{
+	struct zpci_dev *zdev;
+
+	if (!entry->vendor_id)
+		return;
+
+	zdev = get_zdev_by_fid(entry->fid);
+	if (!zdev)
+		return;
+
+	zdev->fh = entry->fh;
+}
+
+int clp_scan_pci_devices(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_add);
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_rescan_pci_devices(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_rescan);
+
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_rescan_pci_devices_simple(void)
+{
+	struct clp_req_rsp_list_pci *rrb;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_NOWAIT);
+	if (!rrb)
+		return -ENOMEM;
+
+	rc = clp_list_pci(rrb, __clp_update);
+
+	clp_free_block(rrb);
+	return rc;
+}
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
new file mode 100644
index 000000000..4129b0a5f
--- /dev/null
+++ b/arch/s390/pci/pci_debug.c
@@ -0,0 +1,177 @@
+/*
+ *  Copyright IBM Corp. 2012
+ *
+ *  Author(s):
+ *    Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/debug.h>
+
+#include <asm/pci_dma.h>
+
+static struct dentry *debugfs_root;
+debug_info_t *pci_debug_msg_id;
+EXPORT_SYMBOL_GPL(pci_debug_msg_id);
+debug_info_t *pci_debug_err_id;
+EXPORT_SYMBOL_GPL(pci_debug_err_id);
+
+static char *pci_perf_names[] = {
+	/* hardware counters */
+	"Load operations",
+	"Store operations",
+	"Store block operations",
+	"Refresh operations",
+	"DMA read bytes",
+	"DMA write bytes",
+};
+
+static char *pci_sw_names[] = {
+	"Allocated pages",
+	"Mapped pages",
+	"Unmapped pages",
+};
+
+static void pci_sw_counter_show(struct seq_file *m)
+{
+	struct zpci_dev *zdev = m->private;
+	atomic64_t *counter = &zdev->allocated_pages;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
+		seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
+			   atomic64_read(counter));
+}
+
+static int pci_perf_show(struct seq_file *m, void *v)
+{
+	struct zpci_dev *zdev = m->private;
+	u64 *stat;
+	int i;
+
+	if (!zdev)
+		return 0;
+
+	mutex_lock(&zdev->lock);
+	if (!zdev->fmb) {
+		mutex_unlock(&zdev->lock);
+		seq_puts(m, "FMB statistics disabled\n");
+		return 0;
+	}
+
+	/* header */
+	seq_printf(m, "FMB @ %p\n", zdev->fmb);
+	seq_printf(m, "Update interval: %u ms\n", zdev->fmb_update);
+	seq_printf(m, "Samples: %u\n", zdev->fmb->samples);
+	seq_printf(m, "Last update TOD: %Lx\n", zdev->fmb->last_update);
+
+	/* hardware counters */
+	stat = (u64 *) &zdev->fmb->ld_ops;
+	for (i = 0; i < 4; i++)
+		seq_printf(m, "%26s:\t%llu\n",
+			   pci_perf_names[i], *(stat + i));
+	if (zdev->fmb->dma_valid)
+		for (i = 4; i < 6; i++)
+			seq_printf(m, "%26s:\t%llu\n",
+				   pci_perf_names[i], *(stat + i));
+
+	pci_sw_counter_show(m);
+	mutex_unlock(&zdev->lock);
+	return 0;
+}
+
+static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf,
+				  size_t count, loff_t *off)
+{
+	struct zpci_dev *zdev = ((struct seq_file *) file->private_data)->private;
+	unsigned long val;
+	int rc;
+
+	if (!zdev)
+		return 0;
+
+	rc = kstrtoul_from_user(ubuf, count, 10, &val);
+	if (rc)
+		return rc;
+
+	mutex_lock(&zdev->lock);
+	switch (val) {
+	case 0:
+		rc = zpci_fmb_disable_device(zdev);
+		break;
+	case 1:
+		rc = zpci_fmb_enable_device(zdev);
+		break;
+	}
+	mutex_unlock(&zdev->lock);
+	return rc ? rc : count;
+}
+
+static int pci_perf_seq_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, pci_perf_show,
+			   file_inode(filp)->i_private);
+}
+
+static const struct file_operations debugfs_pci_perf_fops = {
+	.open	 = pci_perf_seq_open,
+	.read	 = seq_read,
+	.write	 = pci_perf_seq_write,
+	.llseek  = seq_lseek,
+	.release = single_release,
+};
+
+void zpci_debug_init_device(struct zpci_dev *zdev)
+{
+	zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev),
+					       debugfs_root);
+	if (IS_ERR(zdev->debugfs_dev))
+		zdev->debugfs_dev = NULL;
+
+	zdev->debugfs_perf = debugfs_create_file("statistics",
+				S_IFREG | S_IRUGO | S_IWUSR,
+				zdev->debugfs_dev, zdev,
+				&debugfs_pci_perf_fops);
+	if (IS_ERR(zdev->debugfs_perf))
+		zdev->debugfs_perf = NULL;
+}
+
+void zpci_debug_exit_device(struct zpci_dev *zdev)
+{
+	debugfs_remove(zdev->debugfs_perf);
+	debugfs_remove(zdev->debugfs_dev);
+}
+
+int __init zpci_debug_init(void)
+{
+	/* event trace buffer */
+	pci_debug_msg_id = debug_register("pci_msg", 8, 1, 8 * sizeof(long));
+	if (!pci_debug_msg_id)
+		return -EINVAL;
+	debug_register_view(pci_debug_msg_id, &debug_sprintf_view);
+	debug_set_level(pci_debug_msg_id, 3);
+
+	/* error log */
+	pci_debug_err_id = debug_register("pci_error", 2, 1, 16);
+	if (!pci_debug_err_id)
+		return -EINVAL;
+	debug_register_view(pci_debug_err_id, &debug_hex_ascii_view);
+	debug_set_level(pci_debug_err_id, 6);
+
+	debugfs_root = debugfs_create_dir("pci", NULL);
+	return 0;
+}
+
+void zpci_debug_exit(void)
+{
+	debug_unregister(pci_debug_msg_id);
+	debug_unregister(pci_debug_err_id);
+	debugfs_remove(debugfs_root);
+}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
new file mode 100644
index 000000000..6fd8d5836
--- /dev/null
+++ b/arch/s390/pci/pci_dma.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/iommu-helper.h>
+#include <linux/dma-mapping.h>
+#include <linux/vmalloc.h>
+#include <linux/pci.h>
+#include <asm/pci_dma.h>
+
+static struct kmem_cache *dma_region_table_cache;
+static struct kmem_cache *dma_page_table_cache;
+static int s390_iommu_strict;
+
+static int zpci_refresh_global(struct zpci_dev *zdev)
+{
+	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
+				  zdev->iommu_pages * PAGE_SIZE);
+}
+
+static unsigned long *dma_alloc_cpu_table(void)
+{
+	unsigned long *table, *entry;
+
+	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
+	if (!table)
+		return NULL;
+
+	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
+		*entry = ZPCI_TABLE_INVALID | ZPCI_TABLE_PROTECTED;
+	return table;
+}
+
+static void dma_free_cpu_table(void *table)
+{
+	kmem_cache_free(dma_region_table_cache, table);
+}
+
+static unsigned long *dma_alloc_page_table(void)
+{
+	unsigned long *table, *entry;
+
+	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
+	if (!table)
+		return NULL;
+
+	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
+		*entry = ZPCI_PTE_INVALID | ZPCI_TABLE_PROTECTED;
+	return table;
+}
+
+static void dma_free_page_table(void *table)
+{
+	kmem_cache_free(dma_page_table_cache, table);
+}
+
+static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
+{
+	unsigned long *sto;
+
+	if (reg_entry_isvalid(*entry))
+		sto = get_rt_sto(*entry);
+	else {
+		sto = dma_alloc_cpu_table();
+		if (!sto)
+			return NULL;
+
+		set_rt_sto(entry, sto);
+		validate_rt_entry(entry);
+		entry_clr_protected(entry);
+	}
+	return sto;
+}
+
+static unsigned long *dma_get_page_table_origin(unsigned long *entry)
+{
+	unsigned long *pto;
+
+	if (reg_entry_isvalid(*entry))
+		pto = get_st_pto(*entry);
+	else {
+		pto = dma_alloc_page_table();
+		if (!pto)
+			return NULL;
+		set_st_pto(entry, pto);
+		validate_st_entry(entry);
+		entry_clr_protected(entry);
+	}
+	return pto;
+}
+
+static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
+{
+	unsigned long *sto, *pto;
+	unsigned int rtx, sx, px;
+
+	rtx = calc_rtx(dma_addr);
+	sto = dma_get_seg_table_origin(&rto[rtx]);
+	if (!sto)
+		return NULL;
+
+	sx = calc_sx(dma_addr);
+	pto = dma_get_page_table_origin(&sto[sx]);
+	if (!pto)
+		return NULL;
+
+	px = calc_px(dma_addr);
+	return &pto[px];
+}
+
+static void dma_update_cpu_trans(struct zpci_dev *zdev, void *page_addr,
+				 dma_addr_t dma_addr, int flags)
+{
+	unsigned long *entry;
+
+	entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
+	if (!entry) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	if (flags & ZPCI_PTE_INVALID) {
+		invalidate_pt_entry(entry);
+		return;
+	} else {
+		set_pt_pfaa(entry, page_addr);
+		validate_pt_entry(entry);
+	}
+
+	if (flags & ZPCI_TABLE_PROTECTED)
+		entry_set_protected(entry);
+	else
+		entry_clr_protected(entry);
+}
+
+static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+			    dma_addr_t dma_addr, size_t size, int flags)
+{
+	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+	dma_addr_t start_dma_addr = dma_addr;
+	unsigned long irq_flags;
+	int i, rc = 0;
+
+	if (!nr_pages)
+		return -EINVAL;
+
+	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
+	if (!zdev->dma_table)
+		goto no_refresh;
+
+	for (i = 0; i < nr_pages; i++) {
+		dma_update_cpu_trans(zdev, page_addr, dma_addr, flags);
+		page_addr += PAGE_SIZE;
+		dma_addr += PAGE_SIZE;
+	}
+
+	/*
+	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+	 * translations when previously invalid translation-table entries are
+	 * validated. With lazy unmap, it also is skipped for previously valid
+	 * entries, but a global rpcit is then required before any address can
+	 * be re-used, i.e. after each iommu bitmap wrap-around.
+	 */
+	if (!zdev->tlb_refresh &&
+			(!s390_iommu_strict ||
+			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
+		goto no_refresh;
+
+	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
+				nr_pages * PAGE_SIZE);
+
+no_refresh:
+	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
+	return rc;
+}
+
+static void dma_free_seg_table(unsigned long entry)
+{
+	unsigned long *sto = get_rt_sto(entry);
+	int sx;
+
+	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
+		if (reg_entry_isvalid(sto[sx]))
+			dma_free_page_table(get_st_pto(sto[sx]));
+
+	dma_free_cpu_table(sto);
+}
+
+static void dma_cleanup_tables(struct zpci_dev *zdev)
+{
+	unsigned long *table;
+	int rtx;
+
+	if (!zdev || !zdev->dma_table)
+		return;
+
+	table = zdev->dma_table;
+	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+		if (reg_entry_isvalid(table[rtx]))
+			dma_free_seg_table(table[rtx]);
+
+	dma_free_cpu_table(table);
+	zdev->dma_table = NULL;
+}
+
+static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
+				       unsigned long start, int size)
+{
+	unsigned long boundary_size;
+
+	boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
+			      PAGE_SIZE) >> PAGE_SHIFT;
+	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
+				start, size, 0, boundary_size, 0);
+}
+
+static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
+{
+	unsigned long offset, flags;
+	int wrap = 0;
+
+	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
+	offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
+	if (offset == -1) {
+		/* wrap-around */
+		offset = __dma_alloc_iommu(zdev, 0, size);
+		wrap = 1;
+	}
+
+	if (offset != -1) {
+		zdev->next_bit = offset + size;
+		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
+			/* global flush after wrap-around with lazy unmap */
+			zpci_refresh_global(zdev);
+	}
+	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+	return offset;
+}
+
+static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
+	if (!zdev->iommu_bitmap)
+		goto out;
+	bitmap_clear(zdev->iommu_bitmap, offset, size);
+	/*
+	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
+	 * until wrap-around.
+	 */
+	if (!s390_iommu_strict && offset >= zdev->next_bit)
+		zdev->next_bit = offset + size;
+out:
+	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+}
+
+int dma_set_mask(struct device *dev, u64 mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, mask))
+		return -EIO;
+
+	*dev->dma_mask = mask;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dma_set_mask);
+
+static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction direction,
+				     struct dma_attrs *attrs)
+{
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	unsigned long nr_pages, iommu_page_index;
+	unsigned long pa = page_to_phys(page) + offset;
+	int flags = ZPCI_PTE_VALID;
+	dma_addr_t dma_addr;
+
+	/* This rounds up number of pages based on size and offset */
+	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
+	iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
+	if (iommu_page_index == -1)
+		goto out_err;
+
+	/* Use rounded up size */
+	size = nr_pages * PAGE_SIZE;
+
+	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
+	if (dma_addr + size > zdev->end_dma)
+		goto out_free;
+
+	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
+		flags |= ZPCI_TABLE_PROTECTED;
+
+	if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
+		atomic64_add(nr_pages, &zdev->mapped_pages);
+		return dma_addr + (offset & ~PAGE_MASK);
+	}
+
+out_free:
+	dma_free_iommu(zdev, iommu_page_index, nr_pages);
+out_err:
+	zpci_err("map error:\n");
+	zpci_err_hex(&pa, sizeof(pa));
+	return DMA_ERROR_CODE;
+}
+
+static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
+				 size_t size, enum dma_data_direction direction,
+				 struct dma_attrs *attrs)
+{
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	unsigned long iommu_page_index;
+	int npages;
+
+	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
+	dma_addr = dma_addr & PAGE_MASK;
+	if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
+			     ZPCI_TABLE_PROTECTED | ZPCI_PTE_INVALID)) {
+		zpci_err("unmap error:\n");
+		zpci_err_hex(&dma_addr, sizeof(dma_addr));
+	}
+
+	atomic64_add(npages, &zdev->unmapped_pages);
+	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
+	dma_free_iommu(zdev, iommu_page_index, npages);
+}
+
+static void *s390_dma_alloc(struct device *dev, size_t size,
+			    dma_addr_t *dma_handle, gfp_t flag,
+			    struct dma_attrs *attrs)
+{
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+	struct page *page;
+	unsigned long pa;
+	dma_addr_t map;
+
+	size = PAGE_ALIGN(size);
+	page = alloc_pages(flag, get_order(size));
+	if (!page)
+		return NULL;
+
+	pa = page_to_phys(page);
+	memset((void *) pa, 0, size);
+
+	map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE,
+				 size, DMA_BIDIRECTIONAL, NULL);
+	if (dma_mapping_error(dev, map)) {
+		free_pages(pa, get_order(size));
+		return NULL;
+	}
+
+	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
+	if (dma_handle)
+		*dma_handle = map;
+	return (void *) pa;
+}
+
+static void s390_dma_free(struct device *dev, size_t size,
+			  void *pa, dma_addr_t dma_handle,
+			  struct dma_attrs *attrs)
+{
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+
+	size = PAGE_ALIGN(size);
+	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
+	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
+	free_pages((unsigned long) pa, get_order(size));
+}
+
+static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			   int nr_elements, enum dma_data_direction dir,
+			   struct dma_attrs *attrs)
+{
+	int mapped_elements = 0;
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nr_elements, i) {
+		struct page *page = sg_page(s);
+		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
+						    s->length, dir, NULL);
+		if (!dma_mapping_error(dev, s->dma_address)) {
+			s->dma_length = s->length;
+			mapped_elements++;
+		} else
+			goto unmap;
+	}
+out:
+	return mapped_elements;
+
+unmap:
+	for_each_sg(sg, s, mapped_elements, i) {
+		if (s->dma_address)
+			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
+					     dir, NULL);
+		s->dma_address = 0;
+		s->dma_length = 0;
+	}
+	mapped_elements = 0;
+	goto out;
+}
+
+static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+			      int nr_elements, enum dma_data_direction dir,
+			      struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nr_elements, i) {
+		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
+		s->dma_address = 0;
+		s->dma_length = 0;
+	}
+}
+
+int zpci_dma_init_device(struct zpci_dev *zdev)
+{
+	int rc;
+
+	spin_lock_init(&zdev->iommu_bitmap_lock);
+	spin_lock_init(&zdev->dma_table_lock);
+
+	zdev->dma_table = dma_alloc_cpu_table();
+	if (!zdev->dma_table) {
+		rc = -ENOMEM;
+		goto out_clean;
+	}
+
+	zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
+	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
+	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
+	if (!zdev->iommu_bitmap) {
+		rc = -ENOMEM;
+		goto out_reg;
+	}
+
+	rc = zpci_register_ioat(zdev,
+				0,
+				zdev->start_dma + PAGE_OFFSET,
+				zdev->start_dma + zdev->iommu_size - 1,
+				(u64) zdev->dma_table);
+	if (rc)
+		goto out_reg;
+	return 0;
+
+out_reg:
+	dma_free_cpu_table(zdev->dma_table);
+out_clean:
+	return rc;
+}
+
+void zpci_dma_exit_device(struct zpci_dev *zdev)
+{
+	zpci_unregister_ioat(zdev, 0);
+	dma_cleanup_tables(zdev);
+	vfree(zdev->iommu_bitmap);
+	zdev->iommu_bitmap = NULL;
+	zdev->next_bit = 0;
+}
+
+static int __init dma_alloc_cpu_table_caches(void)
+{
+	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
+					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
+					0, NULL);
+	if (!dma_region_table_cache)
+		return -ENOMEM;
+
+	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
+					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
+					0, NULL);
+	if (!dma_page_table_cache) {
+		kmem_cache_destroy(dma_region_table_cache);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+int __init zpci_dma_init(void)
+{
+	return dma_alloc_cpu_table_caches();
+}
+
+void zpci_dma_exit(void)
+{
+	kmem_cache_destroy(dma_page_table_cache);
+	kmem_cache_destroy(dma_region_table_cache);
+}
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	(1 << 16)
+
+static int __init dma_debug_do_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(dma_debug_do_init);
+
+struct dma_map_ops s390_dma_ops = {
+	.alloc		= s390_dma_alloc,
+	.free		= s390_dma_free,
+	.map_sg		= s390_dma_map_sg,
+	.unmap_sg	= s390_dma_unmap_sg,
+	.map_page	= s390_dma_map_pages,
+	.unmap_page	= s390_dma_unmap_pages,
+	/* if we support direct DMA this must be conditional */
+	.is_phys	= 0,
+	/* dma_supported is unconditionally true without a callback */
+};
+EXPORT_SYMBOL_GPL(s390_dma_ops);
+
+static int __init s390_iommu_setup(char *str)
+{
+	if (!strncmp(str, "strict", 6))
+		s390_iommu_strict = 1;
+	return 0;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
new file mode 100644
index 000000000..460fdb21c
--- /dev/null
+++ b/arch/s390/pci/pci_event.c
@@ -0,0 +1,136 @@
+/*
+ *  Copyright IBM Corp. 2012
+ *
+ *  Author(s):
+ *    Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <asm/pci_debug.h>
+#include <asm/sclp.h>
+
+/* Content Code Description for PCI Function Error */
+struct zpci_ccdf_err {
+	u32 reserved1;
+	u32 fh;				/* function handle */
+	u32 fid;			/* function id */
+	u32 ett		:  4;		/* expected table type */
+	u32 mvn		: 12;		/* MSI vector number */
+	u32 dmaas	:  8;		/* DMA address space */
+	u32		:  6;
+	u32 q		:  1;		/* event qualifier */
+	u32 rw		:  1;		/* read/write */
+	u64 faddr;			/* failing address */
+	u32 reserved3;
+	u16 reserved4;
+	u16 pec;			/* PCI event code */
+} __packed;
+
+/* Content Code Description for PCI Function Availability */
+struct zpci_ccdf_avail {
+	u32 reserved1;
+	u32 fh;				/* function handle */
+	u32 fid;			/* function id */
+	u32 reserved2;
+	u32 reserved3;
+	u32 reserved4;
+	u32 reserved5;
+	u16 reserved6;
+	u16 pec;			/* PCI event code */
+} __packed;
+
+static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
+{
+	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+
+	zpci_err("error CCDF:\n");
+	zpci_err_hex(ccdf, sizeof(*ccdf));
+
+	if (!zdev)
+		return;
+
+	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
+	       pci_name(zdev->pdev), ccdf->pec, ccdf->fid);
+}
+
+void zpci_event_error(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_error(data);
+}
+
+static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
+{
+	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+	struct pci_dev *pdev = zdev ? zdev->pdev : NULL;
+	int ret;
+
+	pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
+		pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
+	zpci_err("avail CCDF:\n");
+	zpci_err_hex(ccdf, sizeof(*ccdf));
+
+	switch (ccdf->pec) {
+	case 0x0301: /* Standby -> Configured */
+		if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
+			break;
+		zdev->state = ZPCI_FN_STATE_CONFIGURED;
+		zdev->fh = ccdf->fh;
+		ret = zpci_enable_device(zdev);
+		if (ret)
+			break;
+		pci_rescan_bus(zdev->bus);
+		break;
+	case 0x0302: /* Reserved -> Standby */
+		if (!zdev)
+			clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+		break;
+	case 0x0303: /* Deconfiguration requested */
+		if (pdev)
+			pci_stop_and_remove_bus_device(pdev);
+
+		ret = zpci_disable_device(zdev);
+		if (ret)
+			break;
+
+		ret = sclp_pci_deconfigure(zdev->fid);
+		zpci_dbg(3, "deconf fid:%x, rc:%d\n", zdev->fid, ret);
+		if (!ret)
+			zdev->state = ZPCI_FN_STATE_STANDBY;
+
+		break;
+	case 0x0304: /* Configured -> Standby */
+		if (pdev) {
+			/* Give the driver a hint that the function is
+			 * already unusable. */
+			pdev->error_state = pci_channel_io_perm_failure;
+			pci_stop_and_remove_bus_device(pdev);
+		}
+
+		zdev->fh = ccdf->fh;
+		zpci_disable_device(zdev);
+		zdev->state = ZPCI_FN_STATE_STANDBY;
+		break;
+	case 0x0306: /* 0x308 or 0x302 for multiple devices */
+		clp_rescan_pci_devices();
+		break;
+	case 0x0308: /* Standby -> Reserved */
+		if (!zdev)
+			break;
+		pci_stop_root_bus(zdev->bus);
+		pci_remove_root_bus(zdev->bus);
+		break;
+	default:
+		break;
+	}
+}
+
+void zpci_event_availability(void *data)
+{
+	if (zpci_is_enabled())
+		__zpci_event_availability(data);
+}
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
new file mode 100644
index 000000000..85267c058
--- /dev/null
+++ b/arch/s390/pci/pci_insn.c
@@ -0,0 +1,202 @@
+/*
+ * s390 specific pci instructions
+ *
+ * Copyright IBM Corp. 2013
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <asm/pci_insn.h>
+#include <asm/processor.h>
+
+#define ZPCI_INSN_BUSY_DELAY	1	/* 1 microsecond */
+
+/* Modify PCI Function Controls */
+static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status)
+{
+	u8 cc;
+
+	asm volatile (
+		"	.insn	rxy,0xe300000000d0,%[req],%[fib]\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=d" (cc), [req] "+d" (req), [fib] "+Q" (*fib)
+		: : "cc");
+	*status = req >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_mod_fc(u64 req, struct zpci_fib *fib)
+{
+	u8 cc, status;
+
+	do {
+		cc = __mpcifc(req, fib, &status);
+		if (cc == 2)
+			msleep(ZPCI_INSN_BUSY_DELAY);
+	} while (cc == 2);
+
+	if (cc)
+		printk_once(KERN_ERR "%s: error cc: %d  status: %d\n",
+			     __func__, cc, status);
+	return (cc) ? -EIO : 0;
+}
+
+/* Refresh PCI Translations */
+static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status)
+{
+	register u64 __addr asm("2") = addr;
+	register u64 __range asm("3") = range;
+	u8 cc;
+
+	asm volatile (
+		"	.insn	rre,0xb9d30000,%[fn],%[addr]\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=d" (cc), [fn] "+d" (fn)
+		: [addr] "d" (__addr), "d" (__range)
+		: "cc");
+	*status = fn >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
+{
+	u8 cc, status;
+
+	do {
+		cc = __rpcit(fn, addr, range, &status);
+		if (cc == 2)
+			udelay(ZPCI_INSN_BUSY_DELAY);
+	} while (cc == 2);
+
+	if (cc)
+		printk_once(KERN_ERR "%s: error cc: %d  status: %d  dma_addr: %Lx  size: %Lx\n",
+			    __func__, cc, status, addr, range);
+	return (cc) ? -EIO : 0;
+}
+
+/* Set Interruption Controls */
+void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+{
+	asm volatile (
+		"	.insn	rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
+		: : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+}
+
+/* PCI Load */
+static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
+{
+	register u64 __req asm("2") = req;
+	register u64 __offset asm("3") = offset;
+	int cc = -ENXIO;
+	u64 __data;
+
+	asm volatile (
+		"	.insn	rre,0xb9d20000,%[data],%[req]\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [data] "=d" (__data), [req] "+d" (__req)
+		:  "d" (__offset)
+		: "cc");
+	*status = __req >> 24 & 0xff;
+	if (!cc)
+		*data = __data;
+
+	return cc;
+}
+
+int zpci_load(u64 *data, u64 req, u64 offset)
+{
+	u8 status;
+	int cc;
+
+	do {
+		cc = __pcilg(data, req, offset, &status);
+		if (cc == 2)
+			udelay(ZPCI_INSN_BUSY_DELAY);
+	} while (cc == 2);
+
+	if (cc)
+		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
+			    __func__, cc, status, req, offset);
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_load);
+
+/* PCI Store */
+static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
+{
+	register u64 __req asm("2") = req;
+	register u64 __offset asm("3") = offset;
+	int cc = -ENXIO;
+
+	asm volatile (
+		"	.insn	rre,0xb9d00000,%[data],%[req]\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [req] "+d" (__req)
+		: "d" (__offset), [data] "d" (data)
+		: "cc");
+	*status = __req >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_store(u64 data, u64 req, u64 offset)
+{
+	u8 status;
+	int cc;
+
+	do {
+		cc = __pcistg(data, req, offset, &status);
+		if (cc == 2)
+			udelay(ZPCI_INSN_BUSY_DELAY);
+	} while (cc == 2);
+
+	if (cc)
+		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
+			__func__, cc, status, req, offset);
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store);
+
+/* PCI Store Block */
+static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
+{
+	int cc = -ENXIO;
+
+	asm volatile (
+		"	.insn	rsy,0xeb00000000d0,%[req],%[offset],%[data]\n"
+		"0:	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		"1:\n"
+		EX_TABLE(0b, 1b)
+		: [cc] "+d" (cc), [req] "+d" (req)
+		: [offset] "d" (offset), [data] "Q" (*data)
+		: "cc");
+	*status = req >> 24 & 0xff;
+	return cc;
+}
+
+int zpci_store_block(const u64 *data, u64 req, u64 offset)
+{
+	u8 status;
+	int cc;
+
+	do {
+		cc = __pcistb(data, req, offset, &status);
+		if (cc == 2)
+			udelay(ZPCI_INSN_BUSY_DELAY);
+	} while (cc == 2);
+
+	if (cc)
+		printk_once(KERN_ERR "%s: error cc: %d  status: %d  req: %Lx  offset: %Lx\n",
+			    __func__, cc, status, req, offset);
+	return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_store_block);
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
new file mode 100644
index 000000000..b1bb2b723
--- /dev/null
+++ b/arch/s390/pci/pci_mmio.c
@@ -0,0 +1,114 @@
+/*
+ * Access to PCI I/O memory from user space programs.
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Alexey Ishchuk <aishchuk@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+
+static long get_pfn(unsigned long user_addr, unsigned long access,
+		    unsigned long *pfn)
+{
+	struct vm_area_struct *vma;
+	long ret;
+
+	down_read(&current->mm->mmap_sem);
+	ret = -EINVAL;
+	vma = find_vma(current->mm, user_addr);
+	if (!vma)
+		goto out;
+	ret = -EACCES;
+	if (!(vma->vm_flags & access))
+		goto out;
+	ret = follow_pfn(vma, user_addr, pfn);
+out:
+	up_read(&current->mm->mmap_sem);
+	return ret;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
+		const void __user *, user_buffer, size_t, length)
+{
+	u8 local_buf[64];
+	void __iomem *io_addr;
+	void *buf;
+	unsigned long pfn;
+	long ret;
+
+	if (!zpci_is_enabled())
+		return -ENODEV;
+
+	if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+		return -EINVAL;
+	if (length > 64) {
+		buf = kmalloc(length, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	} else
+		buf = local_buf;
+
+	ret = get_pfn(mmio_addr, VM_WRITE, &pfn);
+	if (ret)
+		goto out;
+	io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+
+	ret = -EFAULT;
+	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+		goto out;
+
+	if (copy_from_user(buf, user_buffer, length))
+		goto out;
+
+	ret = zpci_memcpy_toio(io_addr, buf, length);
+out:
+	if (buf != local_buf)
+		kfree(buf);
+	return ret;
+}
+
+SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
+		void __user *, user_buffer, size_t, length)
+{
+	u8 local_buf[64];
+	void __iomem *io_addr;
+	void *buf;
+	unsigned long pfn;
+	long ret;
+
+	if (!zpci_is_enabled())
+		return -ENODEV;
+
+	if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
+		return -EINVAL;
+	if (length > 64) {
+		buf = kmalloc(length, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+	} else
+		buf = local_buf;
+
+	ret = get_pfn(mmio_addr, VM_READ, &pfn);
+	if (ret)
+		goto out;
+	io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+
+	if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
+		ret = -EFAULT;
+		goto out;
+	}
+	ret = zpci_memcpy_fromio(buf, io_addr, length);
+	if (ret)
+		goto out;
+	if (copy_to_user(user_buffer, buf, length))
+		ret = -EFAULT;
+
+out:
+	if (buf != local_buf)
+		kfree(buf);
+	return ret;
+}
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
new file mode 100644
index 000000000..fa3ce891e
--- /dev/null
+++ b/arch/s390/pci/pci_sysfs.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright IBM Corp. 2012
+ *
+ * Author(s):
+ *   Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/stat.h>
+#include <linux/pci.h>
+
+#define zpci_attr(name, fmt, member)					\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr, char *buf)	\
+{									\
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));		\
+									\
+	return sprintf(buf, fmt, zdev->member);				\
+}									\
+static DEVICE_ATTR_RO(name)
+
+zpci_attr(function_id, "0x%08x\n", fid);
+zpci_attr(function_handle, "0x%08x\n", fh);
+zpci_attr(pchid, "0x%04x\n", pchid);
+zpci_attr(pfgid, "0x%02x\n", pfgid);
+zpci_attr(vfn, "0x%04x\n", vfn);
+zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(uid, "0x%x\n", uid);
+zpci_attr(segment0, "0x%02x\n", pfip[0]);
+zpci_attr(segment1, "0x%02x\n", pfip[1]);
+zpci_attr(segment2, "0x%02x\n", pfip[2]);
+zpci_attr(segment3, "0x%02x\n", pfip[3]);
+
+static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
+	int ret;
+
+	if (!device_remove_file_self(dev, attr))
+		return count;
+
+	pci_stop_and_remove_bus_device(pdev);
+	ret = zpci_disable_device(zdev);
+	if (ret)
+		return ret;
+
+	ret = zpci_enable_device(zdev);
+	if (ret)
+		return ret;
+
+	pci_rescan_bus(zdev->bus);
+	return count;
+}
+static DEVICE_ATTR_WO(recover);
+
+static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr, char *buf,
+				loff_t off, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
+
+	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
+				       sizeof(zdev->util_str));
+}
+static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static struct bin_attribute *zpci_bin_attrs[] = {
+	&bin_attr_util_string,
+	NULL,
+};
+
+static struct attribute *zpci_dev_attrs[] = {
+	&dev_attr_function_id.attr,
+	&dev_attr_function_handle.attr,
+	&dev_attr_pchid.attr,
+	&dev_attr_pfgid.attr,
+	&dev_attr_pft.attr,
+	&dev_attr_vfn.attr,
+	&dev_attr_uid.attr,
+	&dev_attr_recover.attr,
+	NULL,
+};
+static struct attribute_group zpci_attr_group = {
+	.attrs = zpci_dev_attrs,
+	.bin_attrs = zpci_bin_attrs,
+};
+
+static struct attribute *pfip_attrs[] = {
+	&dev_attr_segment0.attr,
+	&dev_attr_segment1.attr,
+	&dev_attr_segment2.attr,
+	&dev_attr_segment3.attr,
+	NULL,
+};
+static struct attribute_group pfip_attr_group = {
+	.name = "pfip",
+	.attrs = pfip_attrs,
+};
+
+const struct attribute_group *zpci_attr_groups[] = {
+	&zpci_attr_group,
+	&pfip_attr_group,
+	NULL,
+};