summaryrefslogtreecommitdiff
path: root/arch/s390/kernel
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
commit57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree5e910f0e82173f4ef4f51111366a3f1299037a7b /arch/s390/kernel
Initial import
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/.gitignore1
-rw-r--r--arch/s390/kernel/Makefile60
-rw-r--r--arch/s390/kernel/asm-offsets.c184
-rw-r--r--arch/s390/kernel/audit.c78
-rw-r--r--arch/s390/kernel/audit.h15
-rw-r--r--arch/s390/kernel/base.S131
-rw-r--r--arch/s390/kernel/cache.c181
-rw-r--r--arch/s390/kernel/compat_audit.c44
-rw-r--r--arch/s390/kernel/compat_linux.c520
-rw-r--r--arch/s390/kernel/compat_linux.h129
-rw-r--r--arch/s390/kernel/compat_ptrace.h63
-rw-r--r--arch/s390/kernel/compat_signal.c549
-rw-r--r--arch/s390/kernel/compat_wrapper.c222
-rw-r--r--arch/s390/kernel/cpcmd.c113
-rw-r--r--arch/s390/kernel/crash_dump.c687
-rw-r--r--arch/s390/kernel/debug.c1540
-rw-r--r--arch/s390/kernel/diag.c66
-rw-r--r--arch/s390/kernel/dis.c2049
-rw-r--r--arch/s390/kernel/dumpstack.c204
-rw-r--r--arch/s390/kernel/early.c444
-rw-r--r--arch/s390/kernel/ebcdic.c400
-rw-r--r--arch/s390/kernel/entry.S1059
-rw-r--r--arch/s390/kernel/entry.h81
-rw-r--r--arch/s390/kernel/ftrace.c244
-rw-r--r--arch/s390/kernel/head.S454
-rw-r--r--arch/s390/kernel/head64.S105
-rw-r--r--arch/s390/kernel/head_kdump.S100
-rw-r--r--arch/s390/kernel/idle.c125
-rw-r--r--arch/s390/kernel/ipl.c2064
-rw-r--r--arch/s390/kernel/irq.c310
-rw-r--r--arch/s390/kernel/jump_label.c109
-rw-r--r--arch/s390/kernel/kprobes.c733
-rw-r--r--arch/s390/kernel/lgr.c186
-rw-r--r--arch/s390/kernel/machine_kexec.c275
-rw-r--r--arch/s390/kernel/mcount.S82
-rw-r--r--arch/s390/kernel/module.c431
-rw-r--r--arch/s390/kernel/nmi.c352
-rw-r--r--arch/s390/kernel/os_info.c168
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c696
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c322
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c1639
-rw-r--r--arch/s390/kernel/perf_event.c324
-rw-r--r--arch/s390/kernel/pgm_check.S146
-rw-r--r--arch/s390/kernel/process.c226
-rw-r--r--arch/s390/kernel/processor.c103
-rw-r--r--arch/s390/kernel/ptrace.c1533
-rw-r--r--arch/s390/kernel/reipl.S155
-rw-r--r--arch/s390/kernel/relocate_kernel.S121
-rw-r--r--arch/s390/kernel/runtime_instr.c149
-rw-r--r--arch/s390/kernel/s390_ksyms.c13
-rw-r--r--arch/s390/kernel/sclp.S351
-rw-r--r--arch/s390/kernel/setup.c892
-rw-r--r--arch/s390/kernel/signal.c559
-rw-r--r--arch/s390/kernel/smp.c1161
-rw-r--r--arch/s390/kernel/stacktrace.c96
-rw-r--r--arch/s390/kernel/suspend.c225
-rw-r--r--arch/s390/kernel/swsusp.S317
-rw-r--r--arch/s390/kernel/sys_s390.c91
-rw-r--r--arch/s390/kernel/syscalls.S365
-rw-r--r--arch/s390/kernel/sysinfo.c465
-rw-r--r--arch/s390/kernel/time.c1802
-rw-r--r--arch/s390/kernel/topology.c501
-rw-r--r--arch/s390/kernel/traps.c342
-rw-r--r--arch/s390/kernel/uprobes.c385
-rw-r--r--arch/s390/kernel/vdso.c306
-rw-r--r--arch/s390/kernel/vdso32/.gitignore1
-rw-r--r--arch/s390/kernel/vdso32/Makefile58
-rw-r--r--arch/s390/kernel/vdso32/clock_getres.S46
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S149
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S81
-rw-r--r--arch/s390/kernel/vdso32/note.S12
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S138
-rw-r--r--arch/s390/kernel/vdso32/vdso32_wrapper.S14
-rw-r--r--arch/s390/kernel/vdso64/.gitignore1
-rw-r--r--arch/s390/kernel/vdso64/Makefile58
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S52
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S150
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S59
-rw-r--r--arch/s390/kernel/vdso64/note.S12
-rw-r--r--arch/s390/kernel/vdso64/vdso64.lds.S138
-rw-r--r--arch/s390/kernel/vdso64/vdso64_wrapper.S14
-rw-r--r--arch/s390/kernel/vmlinux.lds.S93
-rw-r--r--arch/s390/kernel/vtime.c379
83 files changed, 29298 insertions, 0 deletions
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
new file mode 100644
index 000000000..c5f676c3c
--- /dev/null
+++ b/arch/s390/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
new file mode 100644
index 000000000..ffb87617a
--- /dev/null
+++ b/arch/s390/kernel/Makefile
@@ -0,0 +1,60 @@
+#
+# Makefile for the linux kernel.
+#
+
+ifdef CONFIG_FUNCTION_TRACER
+# Don't trace early setup code and tracing code
+CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+endif
+
+#
+# Passing null pointers is ok for smp code, since we access the lowcore here.
+#
+CFLAGS_smp.o := -Wno-nonnull
+
+#
+# Disable tailcall optimizations for stack / callchain walking functions
+# since this might generate broken code when accessing register 15 and
+# passing its content to other functions.
+#
+CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
+CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
+
+#
+# Pass UTS_MACHINE for user_regset definition
+#
+CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+CFLAGS_sysinfo.o += -w
+
+obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
+obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
+obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
+obj-y += runtime_instr.o cache.o dumpstack.o
+obj-y += entry.o reipl.o relocate_kernel.o
+
+extra-y += head.o head64.o vmlinux.lds
+
+obj-$(CONFIG_MODULES) += s390_ksyms.o module.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SCHED_BOOK) += topology.o
+obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
+obj-$(CONFIG_AUDIT) += audit.o
+compat-obj-$(CONFIG_AUDIT) += compat_audit.o
+obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
+obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y)
+
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_UPROBES) += uprobes.o
+
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o
+obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o
+
+# vdso
+obj-y += vdso64/
+obj-$(CONFIG_COMPAT) += vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
new file mode 100644
index 000000000..c7d1b9d09
--- /dev/null
+++ b/arch/s390/kernel/asm-offsets.c
@@ -0,0 +1,184 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#define ASM_OFFSETS_C
+
+#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
+#include <linux/sched.h>
+#include <asm/idle.h>
+#include <asm/vdso.h>
+#include <asm/pgtable.h>
+
+/*
+ * Make sure that the compiler is new enough. We want a compiler that
+ * is known to work with the "Q" assembler constraint.
+ */
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)
+#error Your compiler is too old; please use version 4.3 or newer
+#endif
+
+int main(void)
+{
+ DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
+ DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
+ DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment));
+ BLANK();
+ DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
+ BLANK();
+ DEFINE(__THREAD_per_cause, offsetof(struct task_struct, thread.per_event.cause));
+ DEFINE(__THREAD_per_address, offsetof(struct task_struct, thread.per_event.address));
+ DEFINE(__THREAD_per_paid, offsetof(struct task_struct, thread.per_event.paid));
+ BLANK();
+ DEFINE(__TI_task, offsetof(struct thread_info, task));
+ DEFINE(__TI_flags, offsetof(struct thread_info, flags));
+ DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table));
+ DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
+ DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
+ DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
+ DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
+ DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
+ BLANK();
+ DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
+ DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
+ DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
+ DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
+ DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
+ DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
+ DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
+ DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags));
+ DEFINE(__PT_SIZE, sizeof(struct pt_regs));
+ BLANK();
+ DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
+ DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
+ DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
+ BLANK();
+ /* timeval/timezone offsets for use by vdso */
+ DEFINE(__VDSO_UPD_COUNT, offsetof(struct vdso_data, tb_update_count));
+ DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp));
+ DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec));
+ DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
+ DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
+ DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
+ DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
+ DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+ DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec));
+ DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec));
+ DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
+ DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
+ DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult));
+ DEFINE(__VDSO_TK_SHIFT, offsetof(struct vdso_data, tk_shift));
+ DEFINE(__VDSO_ECTG_BASE, offsetof(struct vdso_per_cpu_data, ectg_timer_base));
+ DEFINE(__VDSO_ECTG_USER, offsetof(struct vdso_per_cpu_data, ectg_user_time));
+ /* constants used by the vdso */
+ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
+ DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+ DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
+ DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
+ DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+ DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
+ BLANK();
+ /* idle data offsets */
+ DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter));
+ DEFINE(__CLOCK_IDLE_EXIT, offsetof(struct s390_idle_data, clock_idle_exit));
+ DEFINE(__TIMER_IDLE_ENTER, offsetof(struct s390_idle_data, timer_idle_enter));
+ DEFINE(__TIMER_IDLE_EXIT, offsetof(struct s390_idle_data, timer_idle_exit));
+ /* lowcore offsets */
+ DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
+ DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
+ DEFINE(__LC_EXT_INT_CODE, offsetof(struct _lowcore, ext_int_code));
+ DEFINE(__LC_SVC_ILC, offsetof(struct _lowcore, svc_ilc));
+ DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code));
+ DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
+ DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
+ DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
+ DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
+ DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
+ DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
+ DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
+ DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
+ DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
+ DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
+ DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
+ DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
+ DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
+ DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
+ DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
+ DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
+ DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
+ DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
+ DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
+ DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
+ DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
+ DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
+ DEFINE(__LC_PGM_OLD_PSW, offsetof(struct _lowcore, program_old_psw));
+ DEFINE(__LC_MCK_OLD_PSW, offsetof(struct _lowcore, mcck_old_psw));
+ DEFINE(__LC_IO_OLD_PSW, offsetof(struct _lowcore, io_old_psw));
+ DEFINE(__LC_RST_NEW_PSW, offsetof(struct _lowcore, restart_psw));
+ DEFINE(__LC_EXT_NEW_PSW, offsetof(struct _lowcore, external_new_psw));
+ DEFINE(__LC_SVC_NEW_PSW, offsetof(struct _lowcore, svc_new_psw));
+ DEFINE(__LC_PGM_NEW_PSW, offsetof(struct _lowcore, program_new_psw));
+ DEFINE(__LC_MCK_NEW_PSW, offsetof(struct _lowcore, mcck_new_psw));
+ DEFINE(__LC_IO_NEW_PSW, offsetof(struct _lowcore, io_new_psw));
+ BLANK();
+ DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
+ DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
+ DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
+ DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags));
+ DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
+ DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
+ DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
+ DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
+ DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
+ DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
+ DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
+ DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
+ DEFINE(__LC_STEAL_TIMER, offsetof(struct _lowcore, steal_timer));
+ DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer));
+ DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock));
+ DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task));
+ DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid));
+ DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info));
+ DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
+ DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
+ DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
+ DEFINE(__LC_RESTART_STACK, offsetof(struct _lowcore, restart_stack));
+ DEFINE(__LC_RESTART_FN, offsetof(struct _lowcore, restart_fn));
+ DEFINE(__LC_RESTART_DATA, offsetof(struct _lowcore, restart_data));
+ DEFINE(__LC_RESTART_SOURCE, offsetof(struct _lowcore, restart_source));
+ DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
+ DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
+ DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
+ DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
+ DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
+ DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
+ BLANK();
+ DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
+ DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
+ DEFINE(__LC_PSW_SAVE_AREA, offsetof(struct _lowcore, psw_save_area));
+ DEFINE(__LC_PREFIX_SAVE_AREA, offsetof(struct _lowcore, prefixreg_save_area));
+ DEFINE(__LC_AREGS_SAVE_AREA, offsetof(struct _lowcore, access_regs_save_area));
+ DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area));
+ DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area));
+ DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area));
+ DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
+ DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
+ DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
+ DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
+ DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
+ DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
+ DEFINE(__LC_FP_CREG_SAVE_AREA, offsetof(struct _lowcore, fpt_creg_save_area));
+ DEFINE(__LC_LAST_BREAK, offsetof(struct _lowcore, breaking_event_addr));
+ DEFINE(__LC_VDSO_PER_CPU, offsetof(struct _lowcore, vdso_per_cpu_data));
+ DEFINE(__LC_GMAP, offsetof(struct _lowcore, gmap));
+ DEFINE(__LC_PGM_TDB, offsetof(struct _lowcore, pgm_tdb));
+ DEFINE(__THREAD_trap_tdb, offsetof(struct task_struct, thread.trap_tdb));
+ DEFINE(__GMAP_ASCE, offsetof(struct gmap, asce));
+ DEFINE(__SIE_PROG0C, offsetof(struct kvm_s390_sie_block, prog0c));
+ DEFINE(__SIE_PROG20, offsetof(struct kvm_s390_sie_block, prog20));
+ return 0;
+}
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
new file mode 100644
index 000000000..f4932c22e
--- /dev/null
+++ b/arch/s390/kernel/audit.c
@@ -0,0 +1,78 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+#include "audit.h"
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_COMPAT
+ if (arch == AUDIT_ARCH_S390)
+ return 1;
+#endif
+ return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_COMPAT
+ if (abi == AUDIT_ARCH_S390)
+ return s390_classify_syscall(syscall);
+#endif
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 0;
+ }
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_COMPAT
+ audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class);
+ audit_register_class(AUDIT_CLASS_READ_32, s390_read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class);
+#endif
+ audit_register_class(AUDIT_CLASS_WRITE, write_class);
+ audit_register_class(AUDIT_CLASS_READ, read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+ return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h
new file mode 100644
index 000000000..12b56f4b5
--- /dev/null
+++ b/arch/s390/kernel/audit.h
@@ -0,0 +1,15 @@
+#ifndef __ARCH_S390_KERNEL_AUDIT_H
+#define __ARCH_S390_KERNEL_AUDIT_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_COMPAT
+extern int s390_classify_syscall(unsigned);
+extern __u32 s390_dir_class[];
+extern __u32 s390_write_class[];
+extern __u32 s390_read_class[];
+extern __u32 s390_chattr_class[];
+extern __u32 s390_signal_class[];
+#endif /* CONFIG_COMPAT */
+
+#endif /* __ARCH_S390_KERNEL_AUDIT_H */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
new file mode 100644
index 000000000..daed3fde4
--- /dev/null
+++ b/arch/s390/kernel/base.S
@@ -0,0 +1,131 @@
+/*
+ * arch/s390/kernel/base.S
+ *
+ * Copyright IBM Corp. 2006, 2007
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/sigp.h>
+
+ENTRY(s390_base_mcck_handler)
+ basr %r13,0
+0: lg %r15,__LC_PANIC_STACK # load panic stack
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ larl %r1,s390_base_mcck_handler_fn
+ lg %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 1f
+ basr %r14,%r1
+1: la %r1,4095
+ lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
+ lpswe __LC_MCK_OLD_PSW
+
+ .section .bss
+ .align 8
+ .globl s390_base_mcck_handler_fn
+s390_base_mcck_handler_fn:
+ .quad 0
+ .previous
+
+ENTRY(s390_base_ext_handler)
+ stmg %r0,%r15,__LC_SAVE_AREA_ASYNC
+ basr %r13,0
+0: aghi %r15,-STACK_FRAME_OVERHEAD
+ larl %r1,s390_base_ext_handler_fn
+ lg %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 1f
+ basr %r14,%r1
+1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC
+ ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
+ lpswe __LC_EXT_OLD_PSW
+
+ .section .bss
+ .align 8
+ .globl s390_base_ext_handler_fn
+s390_base_ext_handler_fn:
+ .quad 0
+ .previous
+
+ENTRY(s390_base_pgm_handler)
+ stmg %r0,%r15,__LC_SAVE_AREA_SYNC
+ basr %r13,0
+0: aghi %r15,-STACK_FRAME_OVERHEAD
+ larl %r1,s390_base_pgm_handler_fn
+ lg %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 1f
+ basr %r14,%r1
+ lmg %r0,%r15,__LC_SAVE_AREA_SYNC
+ lpswe __LC_PGM_OLD_PSW
+1: lpswe disabled_wait_psw-0b(%r13)
+
+ .align 8
+disabled_wait_psw:
+ .quad 0x0002000180000000,0x0000000000000000 + s390_base_pgm_handler
+
+ .section .bss
+ .align 8
+ .globl s390_base_pgm_handler_fn
+s390_base_pgm_handler_fn:
+ .quad 0
+ .previous
+
+#
+# Calls diag 308 subcode 1 and continues execution
+#
+# The following conditions must be ensured before calling this function:
+# * Prefix register = 0
+# * Lowcore protection is disabled
+#
+ENTRY(diag308_reset)
+ larl %r4,.Lctlregs # Save control registers
+ stctg %c0,%c15,0(%r4)
+ larl %r4,.Lfpctl # Floating point control register
+ stfpc 0(%r4)
+ larl %r4,.Lcontinue_psw # Save PSW flags
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r4)
+ larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0
+ lghi %r3,0
+ lg %r4,0(%r4) # Save PSW
+ sturg %r4,%r3 # Use sturg, because of large pages
+ lghi %r1,1
+ lghi %r0,0
+ diag %r0,%r1,0x308
+.Lrestart_part2:
+ lhi %r0,0 # Load r0 with zero
+ lhi %r1,2 # Use mode 2 = ESAME (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
+ sam64 # Switch to 64 bit addressing mode
+ larl %r4,.Lctlregs # Restore control registers
+ lctlg %c0,%c15,0(%r4)
+ larl %r4,.Lfpctl # Restore floating point ctl register
+ lfpc 0(%r4)
+ larl %r4,.Lcontinue_psw # Restore PSW flags
+ lpswe 0(%r4)
+.Lcontinue:
+ br %r14
+.align 16
+.Lrestart_psw:
+ .long 0x00080000,0x80000000 + .Lrestart_part2
+
+ .section .data..nosave,"aw",@progbits
+.align 8
+.Lcontinue_psw:
+ .quad 0,.Lcontinue
+ .previous
+
+ .section .bss
+.align 8
+.Lctlregs:
+ .rept 16
+ .quad 0
+ .endr
+.Lfpctl:
+ .long 0
+ .previous
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
new file mode 100644
index 000000000..bff5e3b6d
--- /dev/null
+++ b/arch/s390/kernel/cache.c
@@ -0,0 +1,181 @@
+/*
+ * Extract CPU cache information and expose them via sysfs.
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/seq_file.h>
+#include <linux/cpu.h>
+#include <linux/cacheinfo.h>
+#include <asm/facility.h>
+
+enum {
+ CACHE_SCOPE_NOTEXISTS,
+ CACHE_SCOPE_PRIVATE,
+ CACHE_SCOPE_SHARED,
+ CACHE_SCOPE_RESERVED,
+};
+
+enum {
+ CTYPE_SEPARATE,
+ CTYPE_DATA,
+ CTYPE_INSTRUCTION,
+ CTYPE_UNIFIED,
+};
+
+enum {
+ EXTRACT_TOPOLOGY,
+ EXTRACT_LINE_SIZE,
+ EXTRACT_SIZE,
+ EXTRACT_ASSOCIATIVITY,
+};
+
+enum {
+ CACHE_TI_UNIFIED = 0,
+ CACHE_TI_DATA = 0,
+ CACHE_TI_INSTRUCTION,
+};
+
+struct cache_info {
+ unsigned char : 4;
+ unsigned char scope : 2;
+ unsigned char type : 2;
+};
+
+#define CACHE_MAX_LEVEL 8
+union cache_topology {
+ struct cache_info ci[CACHE_MAX_LEVEL];
+ unsigned long long raw;
+};
+
+static const char * const cache_type_string[] = {
+ "",
+ "Instruction",
+ "Data",
+ "",
+ "Unified",
+};
+
+static const enum cache_type cache_type_map[] = {
+ [CTYPE_SEPARATE] = CACHE_TYPE_SEPARATE,
+ [CTYPE_DATA] = CACHE_TYPE_DATA,
+ [CTYPE_INSTRUCTION] = CACHE_TYPE_INST,
+ [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
+void show_cacheinfo(struct seq_file *m)
+{
+ struct cpu_cacheinfo *this_cpu_ci;
+ struct cacheinfo *cache;
+ int idx;
+
+ if (!test_facility(34))
+ return;
+ get_online_cpus();
+ this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
+ for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+ cache = this_cpu_ci->info_list + idx;
+ seq_printf(m, "cache%-11d: ", idx);
+ seq_printf(m, "level=%d ", cache->level);
+ seq_printf(m, "type=%s ", cache_type_string[cache->type]);
+ seq_printf(m, "scope=%s ",
+ cache->disable_sysfs ? "Shared" : "Private");
+ seq_printf(m, "size=%dK ", cache->size >> 10);
+ seq_printf(m, "line_size=%u ", cache->coherency_line_size);
+ seq_printf(m, "associativity=%d", cache->ways_of_associativity);
+ seq_puts(m, "\n");
+ }
+ put_online_cpus();
+}
+
+static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
+{
+ if (level >= CACHE_MAX_LEVEL)
+ return CACHE_TYPE_NOCACHE;
+ ci += level;
+ if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
+ return CACHE_TYPE_NOCACHE;
+ return cache_type_map[ci->type];
+}
+
+static inline unsigned long ecag(int ai, int li, int ti)
+{
+ unsigned long cmd, val;
+
+ cmd = ai << 4 | li << 1 | ti;
+ asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
+ : "=d" (val) : "a" (cmd));
+ return val;
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
+ enum cache_type type, unsigned int level, int cpu)
+{
+ int ti, num_sets;
+
+ if (type == CACHE_TYPE_INST)
+ ti = CACHE_TI_INSTRUCTION;
+ else
+ ti = CACHE_TI_UNIFIED;
+ this_leaf->level = level + 1;
+ this_leaf->type = type;
+ this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
+ this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
+ this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
+ num_sets = this_leaf->size / this_leaf->coherency_line_size;
+ num_sets /= this_leaf->ways_of_associativity;
+ this_leaf->number_of_sets = num_sets;
+ cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+ if (!private)
+ this_leaf->disable_sysfs = true;
+}
+
+int init_cache_level(unsigned int cpu)
+{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ unsigned int level = 0, leaves = 0;
+ union cache_topology ct;
+ enum cache_type ctype;
+
+ if (!this_cpu_ci)
+ return -EINVAL;
+ ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+ do {
+ ctype = get_cache_type(&ct.ci[0], level);
+ if (ctype == CACHE_TYPE_NOCACHE)
+ break;
+ /* Separate instruction and data caches */
+ leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
+ } while (++level < CACHE_MAX_LEVEL);
+ this_cpu_ci->num_levels = level;
+ this_cpu_ci->num_leaves = leaves;
+ return 0;
+}
+
+int populate_cache_leaves(unsigned int cpu)
+{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ unsigned int level, idx, pvt;
+ union cache_topology ct;
+ enum cache_type ctype;
+
+ if (!test_facility(34))
+ return -EOPNOTSUPP;
+ ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+ for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
+ idx < this_cpu_ci->num_leaves; idx++, level++) {
+ if (!this_leaf)
+ return -EINVAL;
+ pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
+ ctype = get_cache_type(&ct.ci[0], level);
+ if (ctype == CACHE_TYPE_SEPARATE) {
+ ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
+ ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
+ } else {
+ ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
+ }
+ }
+ return 0;
+}
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
new file mode 100644
index 000000000..d6487bf87
--- /dev/null
+++ b/arch/s390/kernel/compat_audit.c
@@ -0,0 +1,44 @@
+#undef __s390x__
+#include <asm/unistd.h>
+#include "audit.h"
+
+unsigned s390_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned s390_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned s390_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned s390_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned s390_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int s390_classify_syscall(unsigned syscall)
+{
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+ case __NR_openat:
+ return 3;
+ case __NR_socketcall:
+ return 4;
+ case __NR_execve:
+ return 5;
+ default:
+ return 1;
+ }
+}
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
new file mode 100644
index 000000000..437e61159
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.c
@@ -0,0 +1,520 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Gerhard Tonn (ton@de.ibm.com)
+ * Thomas Spatzier (tspat@de.ibm.com)
+ *
+ * Conversion between 31bit and 64bit native syscalls.
+ *
+ * Heavily inspired by the 32-bit Sparc compat code which is
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/file.h>
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/uio.h>
+#include <linux/quota.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/filter.h>
+#include <linux/highmem.h>
+#include <linux/highuid.h>
+#include <linux/mman.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/icmpv6.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/binfmts.h>
+#include <linux/capability.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/ptrace.h>
+#include <linux/fadvise.h>
+#include <linux/ipc.h>
+#include <linux/slab.h>
+
+#include <asm/types.h>
+#include <asm/uaccess.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+
+#include "compat_linux.h"
+
+/* For this source file, we want overflow handling. */
+
+#undef high2lowuid
+#undef high2lowgid
+#undef low2highuid
+#undef low2highgid
+#undef SET_UID16
+#undef SET_GID16
+#undef NEW_TO_OLD_UID
+#undef NEW_TO_OLD_GID
+#undef SET_OLDSTAT_UID
+#undef SET_OLDSTAT_GID
+#undef SET_STAT_UID
+#undef SET_STAT_GID
+
+#define high2lowuid(uid) ((uid) > 65535) ? (u16)overflowuid : (u16)(uid)
+#define high2lowgid(gid) ((gid) > 65535) ? (u16)overflowgid : (u16)(gid)
+#define low2highuid(uid) ((uid) == (u16)-1) ? (uid_t)-1 : (uid_t)(uid)
+#define low2highgid(gid) ((gid) == (u16)-1) ? (gid_t)-1 : (gid_t)(gid)
+#define SET_UID16(var, uid) var = high2lowuid(uid)
+#define SET_GID16(var, gid) var = high2lowgid(gid)
+#define NEW_TO_OLD_UID(uid) high2lowuid(uid)
+#define NEW_TO_OLD_GID(gid) high2lowgid(gid)
+#define SET_OLDSTAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
+#define SET_OLDSTAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
+#define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid)
+#define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid)
+
+COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename,
+ u16, user, u16, group)
+{
+ return sys_chown(filename, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *,
+ filename, u16, user, u16, group)
+{
+ return sys_lchown(filename, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group)
+{
+ return sys_fchown(fd, low2highuid(user), low2highgid(group));
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid)
+{
+ return sys_setregid(low2highgid(rgid), low2highgid(egid));
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid)
+{
+ return sys_setgid((gid_t)gid);
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid)
+{
+ return sys_setreuid(low2highuid(ruid), low2highuid(euid));
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid)
+{
+ return sys_setuid((uid_t)uid);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid)
+{
+ return sys_setresuid(low2highuid(ruid), low2highuid(euid),
+ low2highuid(suid));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp,
+ u16 __user *, euidp, u16 __user *, suidp)
+{
+ const struct cred *cred = current_cred();
+ int retval;
+ u16 ruid, euid, suid;
+
+ ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid));
+ euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid));
+ suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid));
+
+ if (!(retval = put_user(ruid, ruidp)) &&
+ !(retval = put_user(euid, euidp)))
+ retval = put_user(suid, suidp);
+
+ return retval;
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid)
+{
+ return sys_setresgid(low2highgid(rgid), low2highgid(egid),
+ low2highgid(sgid));
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp,
+ u16 __user *, egidp, u16 __user *, sgidp)
+{
+ const struct cred *cred = current_cred();
+ int retval;
+ u16 rgid, egid, sgid;
+
+ rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid));
+ egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid));
+ sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid));
+
+ if (!(retval = put_user(rgid, rgidp)) &&
+ !(retval = put_user(egid, egidp)))
+ retval = put_user(sgid, sgidp);
+
+ return retval;
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid)
+{
+ return sys_setfsuid((uid_t)uid);
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid)
+{
+ return sys_setfsgid((gid_t)gid);
+}
+
+static int groups16_to_user(u16 __user *grouplist, struct group_info *group_info)
+{
+ struct user_namespace *user_ns = current_user_ns();
+ int i;
+ u16 group;
+ kgid_t kgid;
+
+ for (i = 0; i < group_info->ngroups; i++) {
+ kgid = GROUP_AT(group_info, i);
+ group = (u16)from_kgid_munged(user_ns, kgid);
+ if (put_user(group, grouplist+i))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int groups16_from_user(struct group_info *group_info, u16 __user *grouplist)
+{
+ struct user_namespace *user_ns = current_user_ns();
+ int i;
+ u16 group;
+ kgid_t kgid;
+
+ for (i = 0; i < group_info->ngroups; i++) {
+ if (get_user(group, grouplist+i))
+ return -EFAULT;
+
+ kgid = make_kgid(user_ns, (gid_t)group);
+ if (!gid_valid(kgid))
+ return -EINVAL;
+
+ GROUP_AT(group_info, i) = kgid;
+ }
+
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist)
+{
+ const struct cred *cred = current_cred();
+ int i;
+
+ if (gidsetsize < 0)
+ return -EINVAL;
+
+ get_group_info(cred->group_info);
+ i = cred->group_info->ngroups;
+ if (gidsetsize) {
+ if (i > gidsetsize) {
+ i = -EINVAL;
+ goto out;
+ }
+ if (groups16_to_user(grouplist, cred->group_info)) {
+ i = -EFAULT;
+ goto out;
+ }
+ }
+out:
+ put_group_info(cred->group_info);
+ return i;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist)
+{
+ struct group_info *group_info;
+ int retval;
+
+ if (!may_setgroups())
+ return -EPERM;
+ if ((unsigned)gidsetsize > NGROUPS_MAX)
+ return -EINVAL;
+
+ group_info = groups_alloc(gidsetsize);
+ if (!group_info)
+ return -ENOMEM;
+ retval = groups16_from_user(group_info, grouplist);
+ if (retval) {
+ put_group_info(group_info);
+ return retval;
+ }
+
+ retval = set_current_groups(group_info);
+ put_group_info(group_info);
+
+ return retval;
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getuid16)
+{
+ return high2lowuid(from_kuid_munged(current_user_ns(), current_uid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_geteuid16)
+{
+ return high2lowuid(from_kuid_munged(current_user_ns(), current_euid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getgid16)
+{
+ return high2lowgid(from_kgid_munged(current_user_ns(), current_gid()));
+}
+
+COMPAT_SYSCALL_DEFINE0(s390_getegid16)
+{
+ return high2lowgid(from_kgid_munged(current_user_ns(), current_egid()));
+}
+
+#ifdef CONFIG_SYSVIPC
+COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
+ compat_ulong_t, third, compat_uptr_t, ptr)
+{
+ if (call >> 16) /* hack for backward compatibility */
+ return -EINVAL;
+ return compat_sys_ipc(call, first, second, third, ptr, third);
+}
+#endif
+
+COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
+{
+ return sys_truncate(path, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
+{
+ return sys_ftruncate(fd, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
+ compat_size_t, count, u32, high, u32, low)
+{
+ if ((compat_ssize_t) count < 0)
+ return -EINVAL;
+ return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
+ compat_size_t, count, u32, high, u32, low)
+{
+ if ((compat_ssize_t) count < 0)
+ return -EINVAL;
+ return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
+{
+ return sys_readahead(fd, (unsigned long)high << 32 | low, count);
+}
+
+struct stat64_emu31 {
+ unsigned long long st_dev;
+ unsigned int __pad1;
+#define STAT64_HAS_BROKEN_ST_INO 1
+ u32 __st_ino;
+ unsigned int st_mode;
+ unsigned int st_nlink;
+ u32 st_uid;
+ u32 st_gid;
+ unsigned long long st_rdev;
+ unsigned int __pad3;
+ long st_size;
+ u32 st_blksize;
+ unsigned char __pad4[4];
+ u32 __pad5; /* future possible st_blocks high bits */
+ u32 st_blocks; /* Number 512-byte blocks allocated. */
+ u32 st_atime;
+ u32 __pad6;
+ u32 st_mtime;
+ u32 __pad7;
+ u32 st_ctime;
+ u32 __pad8; /* will be high 32 bits of ctime someday */
+ unsigned long st_ino;
+};
+
+static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
+{
+ struct stat64_emu31 tmp;
+
+ memset(&tmp, 0, sizeof(tmp));
+
+ tmp.st_dev = huge_encode_dev(stat->dev);
+ tmp.st_ino = stat->ino;
+ tmp.__st_ino = (u32)stat->ino;
+ tmp.st_mode = stat->mode;
+ tmp.st_nlink = (unsigned int)stat->nlink;
+ tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
+ tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
+ tmp.st_rdev = huge_encode_dev(stat->rdev);
+ tmp.st_size = stat->size;
+ tmp.st_blksize = (u32)stat->blksize;
+ tmp.st_blocks = (u32)stat->blocks;
+ tmp.st_atime = (u32)stat->atime.tv_sec;
+ tmp.st_mtime = (u32)stat->mtime.tv_sec;
+ tmp.st_ctime = (u32)stat->ctime.tv_sec;
+
+ return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_stat(filename, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_lstat(filename, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
+{
+ struct kstat stat;
+ int ret = vfs_fstat(fd, &stat);
+ if (!ret)
+ ret = cp_stat64(statbuf, &stat);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
+ struct stat64_emu31 __user *, statbuf, int, flag)
+{
+ struct kstat stat;
+ int error;
+
+ error = vfs_fstatat(dfd, filename, &stat, flag);
+ if (error)
+ return error;
+ return cp_stat64(statbuf, &stat);
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct_emu31 {
+ compat_ulong_t addr;
+ compat_ulong_t len;
+ compat_ulong_t prot;
+ compat_ulong_t flags;
+ compat_ulong_t fd;
+ compat_ulong_t offset;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
+{
+ struct mmap_arg_struct_emu31 a;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+ if (a.offset & ~PAGE_MASK)
+ return -EINVAL;
+ return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+ a.offset >> PAGE_SHIFT);
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
+{
+ struct mmap_arg_struct_emu31 a;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ return -EFAULT;
+ return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
+{
+ if ((compat_ssize_t) count < 0)
+ return -EINVAL;
+
+ return sys_read(fd, buf, count);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
+{
+ if ((compat_ssize_t) count < 0)
+ return -EINVAL;
+
+ return sys_write(fd, buf, count);
+}
+
+/*
+ * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
+ * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
+ * because the 31 bit values differ from the 64 bit values.
+ */
+
+COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
+{
+ if (advise == 4)
+ advise = POSIX_FADV_DONTNEED;
+ else if (advise == 5)
+ advise = POSIX_FADV_NOREUSE;
+ return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise);
+}
+
+struct fadvise64_64_args {
+ int fd;
+ long long offset;
+ long long len;
+ int advice;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
+{
+ struct fadvise64_64_args a;
+
+ if ( copy_from_user(&a, args, sizeof(a)) )
+ return -EFAULT;
+ if (a.advice == 4)
+ a.advice = POSIX_FADV_DONTNEED;
+ else if (a.advice == 5)
+ a.advice = POSIX_FADV_NOREUSE;
+ return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
+ u32, nhigh, u32, nlow, unsigned int, flags)
+{
+ return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
+ ((u64)nhigh << 32) + nlow, flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
+ u32, lenhigh, u32, lenlow)
+{
+ return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
+ ((u64)lenhigh << 32) + lenlow);
+}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
new file mode 100644
index 000000000..a0a886c04
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.h
@@ -0,0 +1,129 @@
+#ifndef _ASM_S390X_S390_H
+#define _ASM_S390X_S390_H
+
+#include <linux/compat.h>
+#include <linux/socket.h>
+#include <linux/syscalls.h>
+
+/* Macro that masks the high order bit of an 32 bit pointer and converts it*/
+/* to a 64 bit pointer */
+#define A(__x) ((unsigned long)((__x) & 0x7FFFFFFFUL))
+#define AA(__x) \
+ ((unsigned long)(__x))
+
+/* Now 32bit compatibility types */
+struct ipc_kludge_32 {
+ __u32 msgp; /* pointer */
+ __s32 msgtyp;
+};
+
+/* asm/sigcontext.h */
+typedef union
+{
+ __u64 d;
+ __u32 f;
+} freg_t32;
+
+typedef struct
+{
+ unsigned int fpc;
+ unsigned int pad;
+ freg_t32 fprs[__NUM_FPRS];
+} _s390_fp_regs32;
+
+typedef struct
+{
+ __u32 mask;
+ __u32 addr;
+} _psw_t32 __attribute__ ((aligned(8)));
+
+typedef struct
+{
+ _psw_t32 psw;
+ __u32 gprs[__NUM_GPRS];
+ __u32 acrs[__NUM_ACRS];
+} _s390_regs_common32;
+
+typedef struct
+{
+ _s390_regs_common32 regs;
+ _s390_fp_regs32 fpregs;
+} _sigregs32;
+
+typedef struct
+{
+ __u32 gprs_high[__NUM_GPRS];
+ __u64 vxrs_low[__NUM_VXRS_LOW];
+ __vector128 vxrs_high[__NUM_VXRS_HIGH];
+ __u8 __reserved[128];
+} _sigregs_ext32;
+
+#define _SIGCONTEXT_NSIG32 64
+#define _SIGCONTEXT_NSIG_BPW32 32
+#define __SIGNAL_FRAMESIZE32 96
+#define _SIGMASK_COPY_SIZE32 (sizeof(u32)*2)
+
+struct sigcontext32
+{
+ __u32 oldmask[_COMPAT_NSIG_WORDS];
+ __u32 sregs; /* pointer */
+};
+
+/* asm/signal.h */
+
+/* asm/ucontext.h */
+struct ucontext32 {
+ __u32 uc_flags;
+ __u32 uc_link; /* pointer */
+ compat_stack_t uc_stack;
+ _sigregs32 uc_mcontext;
+ compat_sigset_t uc_sigmask;
+ /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+ unsigned char __unused[128 - sizeof(compat_sigset_t)];
+ _sigregs_ext32 uc_mcontext_ext;
+};
+
+struct stat64_emu31;
+struct mmap_arg_struct_emu31;
+struct fadvise64_64_args;
+
+long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group);
+long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group);
+long compat_sys_s390_setregid16(u16 rgid, u16 egid);
+long compat_sys_s390_setgid16(u16 gid);
+long compat_sys_s390_setreuid16(u16 ruid, u16 euid);
+long compat_sys_s390_setuid16(u16 uid);
+long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid);
+long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
+long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid);
+long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
+long compat_sys_s390_setfsuid16(u16 uid);
+long compat_sys_s390_setfsgid16(u16 gid);
+long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist);
+long compat_sys_s390_getuid16(void);
+long compat_sys_s390_geteuid16(void);
+long compat_sys_s390_getgid16(void);
+long compat_sys_s390_getegid16(void);
+long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
+long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
+long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count);
+long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
+long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count);
+long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
+long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
+long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+
+#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
new file mode 100644
index 000000000..12b823833
--- /dev/null
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -0,0 +1,63 @@
+#ifndef _PTRACE32_H
+#define _PTRACE32_H
+
+#include <asm/ptrace.h> /* needed for NUM_CR_WORDS */
+#include "compat_linux.h" /* needed for psw_compat_t */
+
+struct compat_per_struct_kernel {
+ __u32 cr9; /* PER control bits */
+ __u32 cr10; /* PER starting address */
+ __u32 cr11; /* PER ending address */
+ __u32 bits; /* Obsolete software bits */
+ __u32 starting_addr; /* User specified start address */
+ __u32 ending_addr; /* User specified end address */
+ __u16 perc_atmid; /* PER trap ATMID */
+ __u32 address; /* PER trap instruction address */
+ __u8 access_id; /* PER trap access identification */
+};
+
+struct compat_user_regs_struct
+{
+ psw_compat_t psw;
+ u32 gprs[NUM_GPRS];
+ u32 acrs[NUM_ACRS];
+ u32 orig_gpr2;
+ /* nb: there's a 4-byte hole here */
+ s390_fp_regs fp_regs;
+ /*
+ * These per registers are in here so that gdb can modify them
+ * itself as there is no "official" ptrace interface for hardware
+ * watchpoints. This is the way intel does it.
+ */
+ struct compat_per_struct_kernel per_info;
+ u32 ieee_instruction_pointer; /* obsolete, always 0 */
+};
+
+struct compat_user {
+ /* We start with the registers, to mimic the way that "memory"
+ is returned from the ptrace(3,...) function. */
+ struct compat_user_regs_struct regs;
+ /* The rest of this junk is to help gdb figure out what goes where */
+ u32 u_tsize; /* Text segment size (pages). */
+ u32 u_dsize; /* Data segment size (pages). */
+ u32 u_ssize; /* Stack segment size (pages). */
+ u32 start_code; /* Starting virtual address of text. */
+ u32 start_stack; /* Starting virtual address of stack area.
+ This is actually the bottom of the stack,
+ the top of the stack is always found in the
+ esp register. */
+ s32 signal; /* Signal that caused the core dump. */
+ u32 u_ar0; /* Used by gdb to help find the values for */
+ /* the registers. */
+ u32 magic; /* To uniquely identify a core file */
+ char u_comm[32]; /* User command that was responsible */
+};
+
+typedef struct
+{
+ __u32 len;
+ __u32 kernel_addr;
+ __u32 process_addr;
+} compat_ptrace_area;
+
+#endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
new file mode 100644
index 000000000..fe8d6924e
--- /dev/null
+++ b/arch/s390/kernel/compat_signal.c
@@ -0,0 +1,549 @@
+/*
+ * Copyright IBM Corp. 2000, 2006
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ * Gerhard Tonn (ton@de.ibm.com)
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include "compat_linux.h"
+#include "compat_ptrace.h"
+#include "entry.h"
+
+typedef struct
+{
+ __u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+ struct sigcontext32 sc;
+ _sigregs32 sregs;
+ int signo;
+ _sigregs_ext32 sregs_ext;
+ __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */
+} sigframe32;
+
+typedef struct
+{
+ __u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+ __u16 svc_insn;
+ compat_siginfo_t info;
+ struct ucontext32 uc;
+} rt_sigframe32;
+
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
+{
+ int err;
+
+ /* If you change siginfo_t structure, please be sure
+ this code is fixed accordingly.
+ It should never copy any pad contained in the structure
+ to avoid security leaks, but must copy the generic
+ 3 ints plus the relevant union member.
+ This routine must convert siginfo from 64bit to 32bit as well
+ at the same time. */
+ err = __put_user(from->si_signo, &to->si_signo);
+ err |= __put_user(from->si_errno, &to->si_errno);
+ err |= __put_user((short)from->si_code, &to->si_code);
+ if (from->si_code < 0)
+ err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
+ else {
+ switch (from->si_code >> 16) {
+ case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
+ case __SI_MESGQ >> 16:
+ err |= __put_user(from->si_int, &to->si_int);
+ /* fallthrough */
+ case __SI_KILL >> 16:
+ err |= __put_user(from->si_pid, &to->si_pid);
+ err |= __put_user(from->si_uid, &to->si_uid);
+ break;
+ case __SI_CHLD >> 16:
+ err |= __put_user(from->si_pid, &to->si_pid);
+ err |= __put_user(from->si_uid, &to->si_uid);
+ err |= __put_user(from->si_utime, &to->si_utime);
+ err |= __put_user(from->si_stime, &to->si_stime);
+ err |= __put_user(from->si_status, &to->si_status);
+ break;
+ case __SI_FAULT >> 16:
+ err |= __put_user((unsigned long) from->si_addr,
+ &to->si_addr);
+ break;
+ case __SI_POLL >> 16:
+ err |= __put_user(from->si_band, &to->si_band);
+ err |= __put_user(from->si_fd, &to->si_fd);
+ break;
+ case __SI_TIMER >> 16:
+ err |= __put_user(from->si_tid, &to->si_tid);
+ err |= __put_user(from->si_overrun, &to->si_overrun);
+ err |= __put_user(from->si_int, &to->si_int);
+ break;
+ default:
+ break;
+ }
+ }
+ return err ? -EFAULT : 0;
+}
+
+int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
+{
+ int err;
+ u32 tmp;
+
+ err = __get_user(to->si_signo, &from->si_signo);
+ err |= __get_user(to->si_errno, &from->si_errno);
+ err |= __get_user(to->si_code, &from->si_code);
+
+ if (to->si_code < 0)
+ err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
+ else {
+ switch (to->si_code >> 16) {
+ case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
+ case __SI_MESGQ >> 16:
+ err |= __get_user(to->si_int, &from->si_int);
+ /* fallthrough */
+ case __SI_KILL >> 16:
+ err |= __get_user(to->si_pid, &from->si_pid);
+ err |= __get_user(to->si_uid, &from->si_uid);
+ break;
+ case __SI_CHLD >> 16:
+ err |= __get_user(to->si_pid, &from->si_pid);
+ err |= __get_user(to->si_uid, &from->si_uid);
+ err |= __get_user(to->si_utime, &from->si_utime);
+ err |= __get_user(to->si_stime, &from->si_stime);
+ err |= __get_user(to->si_status, &from->si_status);
+ break;
+ case __SI_FAULT >> 16:
+ err |= __get_user(tmp, &from->si_addr);
+ to->si_addr = (void __force __user *)
+ (u64) (tmp & PSW32_ADDR_INSN);
+ break;
+ case __SI_POLL >> 16:
+ err |= __get_user(to->si_band, &from->si_band);
+ err |= __get_user(to->si_fd, &from->si_fd);
+ break;
+ case __SI_TIMER >> 16:
+ err |= __get_user(to->si_tid, &from->si_tid);
+ err |= __get_user(to->si_overrun, &from->si_overrun);
+ err |= __get_user(to->si_int, &from->si_int);
+ break;
+ default:
+ break;
+ }
+ }
+ return err ? -EFAULT : 0;
+}
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+ int i;
+
+ save_access_regs(current->thread.acrs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ if (current->thread.vxrs) {
+ save_vx_regs(current->thread.vxrs);
+ for (i = 0; i < __NUM_FPRS; i++)
+ current->thread.fp_regs.fprs[i] =
+ *(freg_t *)(current->thread.vxrs + i);
+ } else
+ save_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+ int i;
+
+ restore_access_regs(current->thread.acrs);
+ /* restore_fp_ctl is done in restore_sigregs */
+ if (current->thread.vxrs) {
+ for (i = 0; i < __NUM_FPRS; i++)
+ *(freg_t *)(current->thread.vxrs + i) =
+ current->thread.fp_regs.fprs[i];
+ restore_vx_regs(current->thread.vxrs);
+ } else
+ restore_fp_regs(current->thread.fp_regs.fprs);
+}
+
+static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
+{
+ _sigregs32 user_sregs;
+ int i;
+
+ user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32);
+ user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI;
+ user_sregs.regs.psw.mask |= PSW32_USER_BITS;
+ user_sregs.regs.psw.addr = (__u32) regs->psw.addr |
+ (__u32)(regs->psw.mask & PSW_MASK_BA);
+ for (i = 0; i < NUM_GPRS; i++)
+ user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
+ memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+ sizeof(user_sregs.regs.acrs));
+ memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
+ sizeof(user_sregs.fpregs));
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
+ return -EFAULT;
+ return 0;
+}
+
+static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
+{
+ _sigregs32 user_sregs;
+ int i;
+
+ /* Alwys make any pending restarted system call return -EINTR */
+ current->restart_block.fn = do_no_restart_syscall;
+
+ if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
+ return -EFAULT;
+
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
+ return -EINVAL;
+
+ /* Loading the floating-point-control word can fail. Do that first. */
+ if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ return -EINVAL;
+
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
+ (__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 |
+ (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE);
+ /* Check for invalid user address space control. */
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN);
+ for (i = 0; i < NUM_GPRS; i++)
+ regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
+ memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+ sizeof(current->thread.acrs));
+
+ memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
+ sizeof(current->thread.fp_regs));
+
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+ return 0;
+}
+
+static int save_sigregs_ext32(struct pt_regs *regs,
+ _sigregs_ext32 __user *sregs_ext)
+{
+ __u32 gprs_high[NUM_GPRS];
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Save high gprs to signal stack */
+ for (i = 0; i < NUM_GPRS; i++)
+ gprs_high[i] = regs->gprs[i] >> 32;
+ if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high,
+ sizeof(sregs_ext->gprs_high)))
+ return -EFAULT;
+
+ /* Save vector registers to signal stack */
+ if (current->thread.vxrs) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+ if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_to_user(&sregs_ext->vxrs_high,
+ current->thread.vxrs + __NUM_VXRS_LOW,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static int restore_sigregs_ext32(struct pt_regs *regs,
+ _sigregs_ext32 __user *sregs_ext)
+{
+ __u32 gprs_high[NUM_GPRS];
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Restore high gprs from signal stack */
+ if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
+ sizeof(&sregs_ext->gprs_high)))
+ return -EFAULT;
+ for (i = 0; i < NUM_GPRS; i++)
+ *(__u32 *)&regs->gprs[i] = gprs_high[i];
+
+ /* Restore vector registers from signal stack */
+ if (current->thread.vxrs) {
+ if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+ &sregs_ext->vxrs_high,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+ }
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(sigreturn)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
+ sigset_t set;
+
+ if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
+ goto badframe;
+ set_current_blocked(&set);
+ if (restore_sigregs32(regs, &frame->sregs))
+ goto badframe;
+ if (restore_sigregs_ext32(regs, &frame->sregs_ext))
+ goto badframe;
+ load_sigregs();
+ return regs->gprs[2];
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
+ sigset_t set;
+
+ if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+ set_current_blocked(&set);
+ if (compat_restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+ if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+ if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+ goto badframe;
+ load_sigregs();
+ return regs->gprs[2];
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+/*
+ * Set up a signal frame.
+ */
+
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+ unsigned long sp;
+
+ /* Default to using normal stack */
+ sp = (unsigned long) A(regs->gprs[15]);
+
+ /* Overflow on alternate signal stack gives SIGSEGV. */
+ if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+ return (void __user *) -1UL;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (! sas_ss_flags(sp))
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame32(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+{
+ int sig = ksig->sig;
+ sigframe32 __user *frame;
+ struct sigcontext32 sc;
+ unsigned long restorer;
+ size_t frame_size;
+
+ /*
+ * gprs_high are always present for 31-bit compat tasks.
+ * The space for vector registers is only allocated if
+ * the machine supports it
+ */
+ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
+ if (!MACHINE_HAS_VX)
+ frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
+ sizeof(frame->sregs_ext.vxrs_high);
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
+ if (frame == (void __user *) -1UL)
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+ return -EFAULT;
+
+ /* Create struct sigcontext32 on the signal stack */
+ memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32);
+ sc.sregs = (__u32)(unsigned long __force) &frame->sregs;
+ if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create _sigregs32 on the signal stack */
+ if (save_sigregs32(regs, &frame->sregs))
+ return -EFAULT;
+
+ /* Place signal number on stack to allow backtrace from handler. */
+ if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
+ return -EFAULT;
+
+ /* Create _sigregs_ext32 on the signal stack */
+ if (save_sigregs_ext32(regs, &frame->sregs_ext))
+ return -EFAULT;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long __force)
+ ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+ } else {
+ /* Signal frames without vectors registers are short ! */
+ __u16 __user *svc = (void __user *) frame + frame_size - 2;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
+ }
+
+ /* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
+ regs->gprs[15] = (__force __u64) frame;
+ /* Force 31 bit amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler;
+
+ regs->gprs[2] = sig;
+ regs->gprs[3] = (__force __u64) &frame->sc;
+
+ /* We forgot to include these in the sigcontext.
+ To avoid breaking binary compatibility, they are passed as args. */
+ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+ sig == SIGTRAP || sig == SIGFPE) {
+ /* set extra registers only for synchronous signals */
+ regs->gprs[4] = regs->int_code & 127;
+ regs->gprs[5] = regs->int_parm_long;
+ regs->gprs[6] = task_thread_info(current)->last_break;
+ }
+
+ return 0;
+}
+
+static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+{
+ rt_sigframe32 __user *frame;
+ unsigned long restorer;
+ size_t frame_size;
+ u32 uc_flags;
+
+ frame_size = sizeof(*frame) -
+ sizeof(frame->uc.uc_mcontext_ext.__reserved);
+ /*
+ * gprs_high are always present for 31-bit compat tasks.
+ * The space for vector registers is only allocated if
+ * the machine supports it
+ */
+ uc_flags = UC_GPRS_HIGH;
+ if (MACHINE_HAS_VX) {
+ if (current->thread.vxrs)
+ uc_flags |= UC_VXRS;
+ } else
+ frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
+ sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
+ if (frame == (void __user *) -1UL)
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
+ return -EFAULT;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long __force)
+ ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+ } else {
+ __u16 __user *svc = &frame->svc_insn;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
+ }
+
+ /* Create siginfo on the signal stack */
+ if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create ucontext on the signal stack. */
+ if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+ __put_user(0, &frame->uc.uc_link) ||
+ __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+ save_sigregs32(regs, &frame->uc.uc_mcontext) ||
+ __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+ save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+ return -EFAULT;
+
+ /* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
+ regs->gprs[15] = (__force __u64) frame;
+ /* Force 31 bit amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler;
+
+ regs->gprs[2] = ksig->sig;
+ regs->gprs[3] = (__force __u64) &frame->info;
+ regs->gprs[4] = (__force __u64) &frame->uc;
+ regs->gprs[5] = task_thread_info(current)->last_break;
+ return 0;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs)
+{
+ int ret;
+
+ /* Set up the stack frame */
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ ret = setup_rt_frame32(ksig, oldset, regs);
+ else
+ ret = setup_frame32(ksig, oldset, regs);
+
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
new file mode 100644
index 000000000..d7fa2f0f1
--- /dev/null
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -0,0 +1,222 @@
+/*
+ * Compat system call wrappers.
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include "entry.h"
+
+#define COMPAT_SYSCALL_WRAP1(name, ...) \
+ COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP2(name, ...) \
+ COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP3(name, ...) \
+ COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP4(name, ...) \
+ COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP5(name, ...) \
+ COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__)
+#define COMPAT_SYSCALL_WRAP6(name, ...) \
+ COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__)
+
+#define __SC_COMPAT_TYPE(t, a) \
+ __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a
+
+#define __SC_COMPAT_CAST(t, a) \
+({ \
+ long __ReS = a; \
+ \
+ BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \
+ !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \
+ if (__TYPE_IS_L(t)) \
+ __ReS = (s32)a; \
+ if (__TYPE_IS_UL(t)) \
+ __ReS = (u32)a; \
+ if (__TYPE_IS_PTR(t)) \
+ __ReS = a & 0x7fffffff; \
+ (t)__ReS; \
+})
+
+/*
+ * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by
+ * compat tasks. These wrappers will only be used for system calls where only
+ * the system call arguments need sign or zero extension or zeroing of the upper
+ * 33 bits of pointers.
+ * Note: since the wrapper function will afterwards call a system call which
+ * again performs zero and sign extension for all system call arguments with
+ * a size of less than eight bytes, these compat wrappers only touch those
+ * system call arguments with a size of eight bytes ((unsigned) long and
+ * pointers). Zero and sign extension for e.g. int parameters will be done by
+ * the regular system call wrappers.
+ */
+#define COMPAT_SYSCALL_WRAPx(x, name, ...) \
+ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\
+ asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \
+ { \
+ return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \
+ }
+
+COMPAT_SYSCALL_WRAP1(exit, int, error_code);
+COMPAT_SYSCALL_WRAP1(close, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename);
+COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name);
+COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds);
+COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(nice, int, increment);
+COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname);
+COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname);
+COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes);
+COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes);
+COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk);
+COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler);
+COMPAT_SYSCALL_WRAP1(acct, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags);
+COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid);
+COMPAT_SYSCALL_WRAP1(umask, int, mask);
+COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename);
+COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd);
+COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask);
+COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new);
+COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library);
+COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags);
+COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg);
+COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len);
+COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode);
+COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who);
+COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval);
+COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len);
+COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile);
+COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len);
+COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name);
+COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot);
+COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs);
+COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr);
+COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data);
+COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2);
+COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality);
+COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence);
+COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd);
+COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags);
+COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd);
+COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len);
+COMPAT_SYSCALL_WRAP1(mlockall, int, flags);
+COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param);
+COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid);
+COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy);
+COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy);
+COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr);
+COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout);
+COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5);
+COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size);
+COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr);
+COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data);
+COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid);
+COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid);
+COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist);
+COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid);
+COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid);
+COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid);
+COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid);
+COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group);
+COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid);
+COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid);
+COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid);
+COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid);
+COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old);
+COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec);
+COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior);
+COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags);
+COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count);
+COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size);
+COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size);
+COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name);
+COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name);
+COMPAT_SYSCALL_WRAP1(exit_group, int, error_code);
+COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr);
+COMPAT_SYSCALL_WRAP1(epoll_create, int, size);
+COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event);
+COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout);
+COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id);
+COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id);
+COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx);
+COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result);
+COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name);
+COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id);
+COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id);
+COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags);
+COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio);
+COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who);
+COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask);
+COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd);
+COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode);
+COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev);
+COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag);
+COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag);
+COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags);
+COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname);
+COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz);
+COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode);
+COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode);
+COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags);
+COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache);
+COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count);
+COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags);
+COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags);
+COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags);
+COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags);
+COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags);
+COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags);
+COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig);
+COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags);
+COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val);
+COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags);
+COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim);
+COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag);
+COMPAT_SYSCALL_WRAP1(syncfs, int, fd);
+COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype);
+COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum);
+COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2);
+COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
+COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
+COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs)
+COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
+COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
+COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length);
+COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length);
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
new file mode 100644
index 000000000..199ec92ef
--- /dev/null
+++ b/arch/s390/kernel/cpcmd.c
@@ -0,0 +1,113 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#define KMSG_COMPONENT "cpcmd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <asm/ebcdic.h>
+#include <asm/cpcmd.h>
+#include <asm/io.h>
+
+static DEFINE_SPINLOCK(cpcmd_lock);
+static char cpcmd_buf[241];
+
+static int diag8_noresponse(int cmdlen)
+{
+ register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+ register unsigned long reg3 asm ("3") = cmdlen;
+
+ asm volatile(
+ " sam31\n"
+ " diag %1,%0,0x8\n"
+ " sam64\n"
+ : "+d" (reg3) : "d" (reg2) : "cc");
+ return reg3;
+}
+
+static int diag8_response(int cmdlen, char *response, int *rlen)
+{
+ register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+ register unsigned long reg3 asm ("3") = (addr_t) response;
+ register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L;
+ register unsigned long reg5 asm ("5") = *rlen;
+
+ asm volatile(
+ " sam31\n"
+ " diag %2,%0,0x8\n"
+ " sam64\n"
+ " brc 8,1f\n"
+ " agr %1,%4\n"
+ "1:\n"
+ : "+d" (reg4), "+d" (reg5)
+ : "d" (reg2), "d" (reg3), "d" (*rlen) : "cc");
+ *rlen = reg5;
+ return reg4;
+}
+
+/*
+ * __cpcmd has some restrictions over cpcmd
+ * - the response buffer must reside below 2GB (if any)
+ * - __cpcmd is unlocked and therefore not SMP-safe
+ */
+int __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+ int cmdlen;
+ int rc;
+ int response_len;
+
+ cmdlen = strlen(cmd);
+ BUG_ON(cmdlen > 240);
+ memcpy(cpcmd_buf, cmd, cmdlen);
+ ASCEBC(cpcmd_buf, cmdlen);
+
+ if (response) {
+ memset(response, 0, rlen);
+ response_len = rlen;
+ rc = diag8_response(cmdlen, response, &rlen);
+ EBCASC(response, response_len);
+ } else {
+ rc = diag8_noresponse(cmdlen);
+ }
+ if (response_code)
+ *response_code = rc;
+ return rlen;
+}
+EXPORT_SYMBOL(__cpcmd);
+
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+ char *lowbuf;
+ int len;
+ unsigned long flags;
+
+ if ((virt_to_phys(response) != (unsigned long) response) ||
+ (((unsigned long)response + rlen) >> 31)) {
+ lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA);
+ if (!lowbuf) {
+ pr_warning("The cpcmd kernel function failed to "
+ "allocate a response buffer\n");
+ return -ENOMEM;
+ }
+ spin_lock_irqsave(&cpcmd_lock, flags);
+ len = __cpcmd(cmd, lowbuf, rlen, response_code);
+ spin_unlock_irqrestore(&cpcmd_lock, flags);
+ memcpy(response, lowbuf, rlen);
+ kfree(lowbuf);
+ } else {
+ spin_lock_irqsave(&cpcmd_lock, flags);
+ len = __cpcmd(cmd, response, rlen, response_code);
+ spin_unlock_irqrestore(&cpcmd_lock, flags);
+ }
+ return len;
+}
+EXPORT_SYMBOL(cpcmd);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
new file mode 100644
index 000000000..49b74454d
--- /dev/null
+++ b/arch/s390/kernel/crash_dump.c
@@ -0,0 +1,687 @@
+/*
+ * S390 kdump implementation
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/crash_dump.h>
+#include <asm/lowcore.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+#include <linux/elf.h>
+#include <linux/memblock.h>
+#include <asm/os_info.h>
+#include <asm/elf.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+
+#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
+#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
+#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+ .cnt = 1,
+ .max = 1,
+ .total_size = 0,
+ .regions = &oldmem_region,
+};
+
+#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \
+ for (i = 0, __next_mem_range(&i, nid, &memblock.physmem, \
+ &oldmem_type, p_start, \
+ p_end, p_nid); \
+ i != (u64)ULLONG_MAX; \
+ __next_mem_range(&i, nid, &memblock.physmem, \
+ &oldmem_type, \
+ p_start, p_end, p_nid))
+
+struct dump_save_areas dump_save_areas;
+
+/*
+ * Allocate and add a save area for a CPU
+ */
+struct save_area_ext *dump_save_area_create(int cpu)
+{
+ struct save_area_ext **save_areas, *save_area;
+
+ save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
+ if (!save_area)
+ return NULL;
+ if (cpu + 1 > dump_save_areas.count) {
+ dump_save_areas.count = cpu + 1;
+ save_areas = krealloc(dump_save_areas.areas,
+ dump_save_areas.count * sizeof(void *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!save_areas) {
+ kfree(save_area);
+ return NULL;
+ }
+ dump_save_areas.areas = save_areas;
+ }
+ dump_save_areas.areas[cpu] = save_area;
+ return save_area;
+}
+
+/*
+ * Return physical address for virtual address
+ */
+static inline void *load_real_addr(void *addr)
+{
+ unsigned long real_addr;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ " jz 0f\n"
+ " la %0,0\n"
+ "0:"
+ : "=a" (real_addr) : "a" (addr) : "cc");
+ return (void *)real_addr;
+}
+
+/*
+ * Copy real to virtual or real memory
+ */
+static int copy_from_realmem(void *dest, void *src, size_t count)
+{
+ unsigned long size;
+
+ if (!count)
+ return 0;
+ if (!is_vmalloc_or_module_addr(dest))
+ return memcpy_real(dest, src, count);
+ do {
+ size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK));
+ if (memcpy_real(load_real_addr(dest), src, size))
+ return -EFAULT;
+ count -= size;
+ dest += size;
+ src += size;
+ } while (count);
+ return 0;
+}
+
+/*
+ * Pointer to ELF header in new kernel
+ */
+static void *elfcorehdr_newmem;
+
+/*
+ * Copy one page from zfcpdump "oldmem"
+ *
+ * For pages below HSA size memory from the HSA is copied. Otherwise
+ * real memory copy is used.
+ */
+static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
+ unsigned long src, int userbuf)
+{
+ int rc;
+
+ if (src < sclp_get_hsa_size()) {
+ rc = memcpy_hsa(buf, src, csize, userbuf);
+ } else {
+ if (userbuf)
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
+ else
+ rc = memcpy_real(buf, (void *) src, csize);
+ }
+ return rc ? rc : csize;
+}
+
+/*
+ * Copy one page from kdump "oldmem"
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
+ * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
+ unsigned long src, int userbuf)
+
+{
+ int rc;
+
+ if (src < OLDMEM_SIZE)
+ src += OLDMEM_BASE;
+ else if (src > OLDMEM_BASE &&
+ src < OLDMEM_BASE + OLDMEM_SIZE)
+ src -= OLDMEM_BASE;
+ if (userbuf)
+ rc = copy_to_user_real((void __force __user *) buf,
+ (void *) src, csize);
+ else
+ rc = copy_from_realmem(buf, (void *) src, csize);
+ return (rc == 0) ? rc : csize;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
+{
+ unsigned long src;
+
+ if (!csize)
+ return 0;
+ src = (pfn << PAGE_SHIFT) + offset;
+ if (OLDMEM_BASE)
+ return copy_oldmem_page_kdump(buf, csize, src, userbuf);
+ else
+ return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf);
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+ unsigned long from, unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long size_old;
+ int rc;
+
+ if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) {
+ size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT));
+ rc = remap_pfn_range(vma, from,
+ pfn + (OLDMEM_BASE >> PAGE_SHIFT),
+ size_old, prot);
+ if (rc || size == size_old)
+ return rc;
+ size -= size_old;
+ from += size_old;
+ pfn += size_old >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcpdump
+ *
+ * We only map available memory above HSA size. Memory below HSA size
+ * is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+ unsigned long from,
+ unsigned long pfn,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long hsa_end = sclp_get_hsa_size();
+ unsigned long size_hsa;
+
+ if (pfn < hsa_end >> PAGE_SHIFT) {
+ size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT));
+ if (size == size_hsa)
+ return 0;
+ size -= size_hsa;
+ from += size_hsa;
+ pfn += size_hsa >> PAGE_SHIFT;
+ }
+ return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcpdump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ if (OLDMEM_BASE)
+ return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
+ else
+ return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+ prot);
+}
+
+/*
+ * Copy memory from old kernel
+ */
+int copy_from_oldmem(void *dest, void *src, size_t count)
+{
+ unsigned long copied = 0;
+ int rc;
+
+ if (OLDMEM_BASE) {
+ if ((unsigned long) src < OLDMEM_SIZE) {
+ copied = min(count, OLDMEM_SIZE - (unsigned long) src);
+ rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied);
+ if (rc)
+ return rc;
+ }
+ } else {
+ unsigned long hsa_end = sclp_get_hsa_size();
+ if ((unsigned long) src < hsa_end) {
+ copied = min(count, hsa_end - (unsigned long) src);
+ rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
+ if (rc)
+ return rc;
+ }
+ }
+ return copy_from_realmem(dest + copied, src + copied, count - copied);
+}
+
+/*
+ * Alloc memory and panic in case of ENOMEM
+ */
+static void *kzalloc_panic(int len)
+{
+ void *rc;
+
+ rc = kzalloc(len, GFP_KERNEL);
+ if (!rc)
+ panic("s390 kdump kzalloc (%d) failed", len);
+ return rc;
+}
+
+/*
+ * Initialize ELF note
+ */
+static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
+ const char *name)
+{
+ Elf64_Nhdr *note;
+ u64 len;
+
+ note = (Elf64_Nhdr *)buf;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = d_len;
+ note->n_type = type;
+ len = sizeof(Elf64_Nhdr);
+
+ memcpy(buf + len, name, note->n_namesz);
+ len = roundup(len + note->n_namesz, 4);
+
+ memcpy(buf + len, desc, note->n_descsz);
+ len = roundup(len + note->n_descsz, 4);
+
+ return PTR_ADD(buf, len);
+}
+
+/*
+ * Initialize prstatus note
+ */
+static void *nt_prstatus(void *ptr, struct save_area *sa)
+{
+ struct elf_prstatus nt_prstatus;
+ static int cpu_nr = 1;
+
+ memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+ memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs));
+ memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+ memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs));
+ nt_prstatus.pr_pid = cpu_nr;
+ cpu_nr++;
+
+ return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus),
+ "CORE");
+}
+
+/*
+ * Initialize fpregset (floating point) note
+ */
+static void *nt_fpregset(void *ptr, struct save_area *sa)
+{
+ elf_fpregset_t nt_fpregset;
+
+ memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+ memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg));
+ memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs));
+
+ return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset),
+ "CORE");
+}
+
+/*
+ * Initialize timer note
+ */
+static void *nt_s390_timer(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
+ KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD clock comparator note
+ */
+static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
+ sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD programmable register note
+ */
+static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
+ sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize control register note
+ */
+static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
+ sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize prefix register note
+ */
+static void *nt_s390_prefix(void *ptr, struct save_area *sa)
+{
+ return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
+ sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize vxrs high note (full 128 bit VX registers 16-31)
+ */
+static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs)
+{
+ return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16],
+ 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize vxrs low note (lower halves of VX registers 0-15)
+ */
+static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
+{
+ Elf64_Nhdr *note;
+ u64 len;
+ int i;
+
+ note = (Elf64_Nhdr *)ptr;
+ note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1;
+ note->n_descsz = 16 * 8;
+ note->n_type = NT_S390_VXRS_LOW;
+ len = sizeof(Elf64_Nhdr);
+
+ memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz);
+ len = roundup(len + note->n_namesz, 4);
+
+ ptr += len;
+ /* Copy lower halves of SIMD registers 0-15 */
+ for (i = 0; i < 16; i++) {
+ memcpy(ptr, &vx_regs[i].u[2], 8);
+ ptr += 8;
+ }
+ return ptr;
+}
+
+/*
+ * Fill ELF notes for one CPU with save area registers
+ */
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs)
+{
+ ptr = nt_prstatus(ptr, sa);
+ ptr = nt_fpregset(ptr, sa);
+ ptr = nt_s390_timer(ptr, sa);
+ ptr = nt_s390_tod_cmp(ptr, sa);
+ ptr = nt_s390_tod_preg(ptr, sa);
+ ptr = nt_s390_ctrs(ptr, sa);
+ ptr = nt_s390_prefix(ptr, sa);
+ if (MACHINE_HAS_VX && vx_regs) {
+ ptr = nt_s390_vx_low(ptr, vx_regs);
+ ptr = nt_s390_vx_high(ptr, vx_regs);
+ }
+ return ptr;
+}
+
+/*
+ * Initialize prpsinfo note (new kernel)
+ */
+static void *nt_prpsinfo(void *ptr)
+{
+ struct elf_prpsinfo prpsinfo;
+
+ memset(&prpsinfo, 0, sizeof(prpsinfo));
+ prpsinfo.pr_sname = 'R';
+ strcpy(prpsinfo.pr_fname, "vmlinux");
+ return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo),
+ KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Get vmcoreinfo using lowcore->vmcore_info (new kernel)
+ */
+static void *get_vmcoreinfo_old(unsigned long *size)
+{
+ char nt_name[11], *vmcoreinfo;
+ Elf64_Nhdr note;
+ void *addr;
+
+ if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr)))
+ return NULL;
+ memset(nt_name, 0, sizeof(nt_name));
+ if (copy_from_oldmem(&note, addr, sizeof(note)))
+ return NULL;
+ if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1))
+ return NULL;
+ if (strcmp(nt_name, "VMCOREINFO") != 0)
+ return NULL;
+ vmcoreinfo = kzalloc_panic(note.n_descsz);
+ if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz))
+ return NULL;
+ *size = note.n_descsz;
+ return vmcoreinfo;
+}
+
+/*
+ * Initialize vmcoreinfo note (new kernel)
+ */
+static void *nt_vmcoreinfo(void *ptr)
+{
+ unsigned long size;
+ void *vmcoreinfo;
+
+ vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+ if (!vmcoreinfo)
+ vmcoreinfo = get_vmcoreinfo_old(&size);
+ if (!vmcoreinfo)
+ return ptr;
+ return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO");
+}
+
+/*
+ * Initialize ELF header (new kernel)
+ */
+static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+{
+ memset(ehdr, 0, sizeof(*ehdr));
+ memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+ ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+ ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
+ ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+ memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+ ehdr->e_type = ET_CORE;
+ ehdr->e_machine = EM_S390;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+ ehdr->e_phnum = mem_chunk_cnt + 1;
+ return ehdr + 1;
+}
+
+/*
+ * Return CPU count for ELF header (new kernel)
+ */
+static int get_cpu_cnt(void)
+{
+ int i, cpus = 0;
+
+ for (i = 0; i < dump_save_areas.count; i++) {
+ if (dump_save_areas.areas[i]->sa.pref_reg == 0)
+ continue;
+ cpus++;
+ }
+ return cpus;
+}
+
+/*
+ * Return memory chunk count for ELF header (new kernel)
+ */
+static int get_mem_chunk_cnt(void)
+{
+ int cnt = 0;
+ u64 idx;
+
+ for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL)
+ cnt++;
+ return cnt;
+}
+
+/*
+ * Initialize ELF loads (new kernel)
+ */
+static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+{
+ phys_addr_t start, end;
+ u64 idx;
+
+ for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) {
+ phdr->p_filesz = end - start;
+ phdr->p_type = PT_LOAD;
+ phdr->p_offset = start;
+ phdr->p_vaddr = start;
+ phdr->p_paddr = start;
+ phdr->p_memsz = end - start;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+ phdr++;
+ }
+}
+
+/*
+ * Initialize notes (new kernel)
+ */
+static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
+{
+ struct save_area_ext *sa_ext;
+ void *ptr_start = ptr;
+ int i;
+
+ ptr = nt_prpsinfo(ptr);
+
+ for (i = 0; i < dump_save_areas.count; i++) {
+ sa_ext = dump_save_areas.areas[i];
+ if (sa_ext->sa.pref_reg == 0)
+ continue;
+ ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs);
+ }
+ ptr = nt_vmcoreinfo(ptr);
+ memset(phdr, 0, sizeof(*phdr));
+ phdr->p_type = PT_NOTE;
+ phdr->p_offset = notes_offset;
+ phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
+ phdr->p_memsz = phdr->p_filesz;
+ return ptr;
+}
+
+/*
+ * Create ELF core header (new kernel)
+ */
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
+{
+ Elf64_Phdr *phdr_notes, *phdr_loads;
+ int mem_chunk_cnt;
+ void *ptr, *hdr;
+ u32 alloc_size;
+ u64 hdr_off;
+
+ /* If we are not in kdump or zfcpdump mode return */
+ if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+ return 0;
+ /* If elfcorehdr= has been passed via cmdline, we use that one */
+ if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
+ return 0;
+ /* If we cannot get HSA size for zfcpdump return error */
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size())
+ return -ENODEV;
+
+ /* For kdump, exclude previous crashkernel memory */
+ if (OLDMEM_BASE) {
+ oldmem_region.base = OLDMEM_BASE;
+ oldmem_region.size = OLDMEM_SIZE;
+ oldmem_type.total_size = OLDMEM_SIZE;
+ }
+
+ mem_chunk_cnt = get_mem_chunk_cnt();
+
+ alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 +
+ mem_chunk_cnt * sizeof(Elf64_Phdr);
+ hdr = kzalloc_panic(alloc_size);
+ /* Init elf header */
+ ptr = ehdr_init(hdr, mem_chunk_cnt);
+ /* Init program headers */
+ phdr_notes = ptr;
+ ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
+ phdr_loads = ptr;
+ ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+ /* Init notes */
+ hdr_off = PTR_DIFF(ptr, hdr);
+ ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+ /* Init loads */
+ hdr_off = PTR_DIFF(ptr, hdr);
+ loads_init(phdr_loads, hdr_off);
+ *addr = (unsigned long long) hdr;
+ elfcorehdr_newmem = hdr;
+ *size = (unsigned long long) hdr_off;
+ BUG_ON(elfcorehdr_size > alloc_size);
+ return 0;
+}
+
+/*
+ * Free ELF core header (new kernel)
+ */
+void elfcorehdr_free(unsigned long long addr)
+{
+ if (!elfcorehdr_newmem)
+ return;
+ kfree((void *)(unsigned long)addr);
+}
+
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+
+ src = elfcorehdr_newmem ? src : src - OLDMEM_BASE;
+ memcpy(buf, src, count);
+ *ppos += count;
+ return count;
+}
+
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+ void *src = (void *)(unsigned long)*ppos;
+ int rc;
+
+ if (elfcorehdr_newmem) {
+ memcpy(buf, src, count);
+ } else {
+ rc = copy_from_oldmem(buf, src, count);
+ if (rc)
+ return rc;
+ }
+ *ppos += count;
+ return count;
+}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
new file mode 100644
index 000000000..c1f21aca7
--- /dev/null
+++ b/arch/s390/kernel/debug.c
@@ -0,0 +1,1540 @@
+/*
+ * S/390 debug facility
+ *
+ * Copyright IBM Corp. 1999, 2012
+ *
+ * Author(s): Michael Holzheu (holzheu@de.ibm.com),
+ * Holger Smolinski (Holger.Smolinski@de.ibm.com)
+ *
+ * Bugreports to: <Linux390@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "s390dbf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/sysctl.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+
+#include <asm/debug.h>
+
+#define DEBUG_PROLOG_ENTRY -1
+
+#define ALL_AREAS 0 /* copy all debug areas */
+#define NO_AREAS 1 /* copy no debug areas */
+
+/* typedefs */
+
+typedef struct file_private_info {
+ loff_t offset; /* offset of last read in file */
+ int act_area; /* number of last formated area */
+ int act_page; /* act page in given area */
+ int act_entry; /* last formated entry (offset */
+ /* relative to beginning of last */
+ /* formated page) */
+ size_t act_entry_offset; /* up to this offset we copied */
+ /* in last read the last formated */
+ /* entry to userland */
+ char temp_buf[2048]; /* buffer for output */
+ debug_info_t *debug_info_org; /* original debug information */
+ debug_info_t *debug_info_snap; /* snapshot of debug information */
+ struct debug_view *view; /* used view of debug info */
+} file_private_info_t;
+
+typedef struct
+{
+ char *string;
+ /*
+ * This assumes that all args are converted into longs
+ * on L/390 this is the case for all types of parameter
+ * except of floats, and long long (32 bit)
+ *
+ */
+ long args[0];
+} debug_sprintf_entry_t;
+
+
+/* internal function prototyes */
+
+static int debug_init(void);
+static ssize_t debug_output(struct file *file, char __user *user_buf,
+ size_t user_len, loff_t * offset);
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t * offset);
+static int debug_open(struct inode *inode, struct file *file);
+static int debug_close(struct inode *inode, struct file *file);
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+ int nr_areas, int buf_size, umode_t mode);
+static void debug_info_get(debug_info_t *);
+static void debug_info_put(debug_info_t *);
+static int debug_prolog_level_fn(debug_info_t * id,
+ struct debug_view *view, char *out_buf);
+static int debug_input_level_fn(debug_info_t * id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t * offset);
+static int debug_prolog_pages_fn(debug_info_t * id,
+ struct debug_view *view, char *out_buf);
+static int debug_input_pages_fn(debug_info_t * id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t * offset);
+static int debug_input_flush_fn(debug_info_t * id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_buf_size, loff_t * offset);
+static int debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,
+ char *out_buf, const char *in_buf);
+static int debug_raw_format_fn(debug_info_t * id,
+ struct debug_view *view, char *out_buf,
+ const char *in_buf);
+static int debug_raw_header_fn(debug_info_t * id, struct debug_view *view,
+ int area, debug_entry_t * entry, char *out_buf);
+
+static int debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view,
+ char *out_buf, debug_sprintf_entry_t *curr_event);
+
+/* globals */
+
+struct debug_view debug_raw_view = {
+ "raw",
+ NULL,
+ &debug_raw_header_fn,
+ &debug_raw_format_fn,
+ NULL,
+ NULL
+};
+EXPORT_SYMBOL(debug_raw_view);
+
+struct debug_view debug_hex_ascii_view = {
+ "hex_ascii",
+ NULL,
+ &debug_dflt_header_fn,
+ &debug_hex_ascii_format_fn,
+ NULL,
+ NULL
+};
+EXPORT_SYMBOL(debug_hex_ascii_view);
+
+static struct debug_view debug_level_view = {
+ "level",
+ &debug_prolog_level_fn,
+ NULL,
+ NULL,
+ &debug_input_level_fn,
+ NULL
+};
+
+static struct debug_view debug_pages_view = {
+ "pages",
+ &debug_prolog_pages_fn,
+ NULL,
+ NULL,
+ &debug_input_pages_fn,
+ NULL
+};
+
+static struct debug_view debug_flush_view = {
+ "flush",
+ NULL,
+ NULL,
+ NULL,
+ &debug_input_flush_fn,
+ NULL
+};
+
+struct debug_view debug_sprintf_view = {
+ "sprintf",
+ NULL,
+ &debug_dflt_header_fn,
+ (debug_format_proc_t*)&debug_sprintf_format_fn,
+ NULL,
+ NULL
+};
+EXPORT_SYMBOL(debug_sprintf_view);
+
+/* used by dump analysis tools to determine version of debug feature */
+static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
+
+/* static globals */
+
+static debug_info_t *debug_area_first = NULL;
+static debug_info_t *debug_area_last = NULL;
+static DEFINE_MUTEX(debug_mutex);
+
+static int initialized;
+static int debug_critical;
+
+static const struct file_operations debug_file_ops = {
+ .owner = THIS_MODULE,
+ .read = debug_output,
+ .write = debug_input,
+ .open = debug_open,
+ .release = debug_close,
+ .llseek = no_llseek,
+};
+
+static struct dentry *debug_debugfs_root_entry;
+
+/* functions */
+
+/*
+ * debug_areas_alloc
+ * - Debug areas are implemented as a threedimensonal array:
+ * areas[areanumber][pagenumber][pageoffset]
+ */
+
+static debug_entry_t***
+debug_areas_alloc(int pages_per_area, int nr_areas)
+{
+ debug_entry_t*** areas;
+ int i,j;
+
+ areas = kmalloc(nr_areas *
+ sizeof(debug_entry_t**),
+ GFP_KERNEL);
+ if (!areas)
+ goto fail_malloc_areas;
+ for (i = 0; i < nr_areas; i++) {
+ areas[i] = kmalloc(pages_per_area *
+ sizeof(debug_entry_t*),GFP_KERNEL);
+ if (!areas[i]) {
+ goto fail_malloc_areas2;
+ }
+ for(j = 0; j < pages_per_area; j++) {
+ areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if(!areas[i][j]) {
+ for(j--; j >=0 ; j--) {
+ kfree(areas[i][j]);
+ }
+ kfree(areas[i]);
+ goto fail_malloc_areas2;
+ }
+ }
+ }
+ return areas;
+
+fail_malloc_areas2:
+ for(i--; i >= 0; i--){
+ for(j=0; j < pages_per_area;j++){
+ kfree(areas[i][j]);
+ }
+ kfree(areas[i]);
+ }
+ kfree(areas);
+fail_malloc_areas:
+ return NULL;
+
+}
+
+
+/*
+ * debug_info_alloc
+ * - alloc new debug-info
+ */
+
+static debug_info_t*
+debug_info_alloc(const char *name, int pages_per_area, int nr_areas,
+ int buf_size, int level, int mode)
+{
+ debug_info_t* rc;
+
+ /* alloc everything */
+
+ rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL);
+ if(!rc)
+ goto fail_malloc_rc;
+ rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+ if(!rc->active_entries)
+ goto fail_malloc_active_entries;
+ rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+ if(!rc->active_pages)
+ goto fail_malloc_active_pages;
+ if((mode == ALL_AREAS) && (pages_per_area != 0)){
+ rc->areas = debug_areas_alloc(pages_per_area, nr_areas);
+ if(!rc->areas)
+ goto fail_malloc_areas;
+ } else {
+ rc->areas = NULL;
+ }
+
+ /* initialize members */
+
+ spin_lock_init(&rc->lock);
+ rc->pages_per_area = pages_per_area;
+ rc->nr_areas = nr_areas;
+ rc->active_area = 0;
+ rc->level = level;
+ rc->buf_size = buf_size;
+ rc->entry_size = sizeof(debug_entry_t) + buf_size;
+ strlcpy(rc->name, name, sizeof(rc->name));
+ memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
+ memset(rc->debugfs_entries, 0 ,DEBUG_MAX_VIEWS *
+ sizeof(struct dentry*));
+ atomic_set(&(rc->ref_count), 0);
+
+ return rc;
+
+fail_malloc_areas:
+ kfree(rc->active_pages);
+fail_malloc_active_pages:
+ kfree(rc->active_entries);
+fail_malloc_active_entries:
+ kfree(rc);
+fail_malloc_rc:
+ return NULL;
+}
+
+/*
+ * debug_areas_free
+ * - free all debug areas
+ */
+
+static void
+debug_areas_free(debug_info_t* db_info)
+{
+ int i,j;
+
+ if(!db_info->areas)
+ return;
+ for (i = 0; i < db_info->nr_areas; i++) {
+ for(j = 0; j < db_info->pages_per_area; j++) {
+ kfree(db_info->areas[i][j]);
+ }
+ kfree(db_info->areas[i]);
+ }
+ kfree(db_info->areas);
+ db_info->areas = NULL;
+}
+
+/*
+ * debug_info_free
+ * - free memory debug-info
+ */
+
+static void
+debug_info_free(debug_info_t* db_info){
+ debug_areas_free(db_info);
+ kfree(db_info->active_entries);
+ kfree(db_info->active_pages);
+ kfree(db_info);
+}
+
+/*
+ * debug_info_create
+ * - create new debug-info
+ */
+
+static debug_info_t*
+debug_info_create(const char *name, int pages_per_area, int nr_areas,
+ int buf_size, umode_t mode)
+{
+ debug_info_t* rc;
+
+ rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size,
+ DEBUG_DEFAULT_LEVEL, ALL_AREAS);
+ if(!rc)
+ goto out;
+
+ rc->mode = mode & ~S_IFMT;
+
+ /* create root directory */
+ rc->debugfs_root_entry = debugfs_create_dir(rc->name,
+ debug_debugfs_root_entry);
+
+ /* append new element to linked list */
+ if (!debug_area_first) {
+ /* first element in list */
+ debug_area_first = rc;
+ rc->prev = NULL;
+ } else {
+ /* append element to end of list */
+ debug_area_last->next = rc;
+ rc->prev = debug_area_last;
+ }
+ debug_area_last = rc;
+ rc->next = NULL;
+
+ debug_info_get(rc);
+out:
+ return rc;
+}
+
+/*
+ * debug_info_copy
+ * - copy debug-info
+ */
+
+static debug_info_t*
+debug_info_copy(debug_info_t* in, int mode)
+{
+ int i,j;
+ debug_info_t* rc;
+ unsigned long flags;
+
+ /* get a consistent copy of the debug areas */
+ do {
+ rc = debug_info_alloc(in->name, in->pages_per_area,
+ in->nr_areas, in->buf_size, in->level, mode);
+ spin_lock_irqsave(&in->lock, flags);
+ if(!rc)
+ goto out;
+ /* has something changed in the meantime ? */
+ if((rc->pages_per_area == in->pages_per_area) &&
+ (rc->nr_areas == in->nr_areas)) {
+ break;
+ }
+ spin_unlock_irqrestore(&in->lock, flags);
+ debug_info_free(rc);
+ } while (1);
+
+ if (mode == NO_AREAS)
+ goto out;
+
+ for(i = 0; i < in->nr_areas; i++){
+ for(j = 0; j < in->pages_per_area; j++) {
+ memcpy(rc->areas[i][j], in->areas[i][j],PAGE_SIZE);
+ }
+ }
+out:
+ spin_unlock_irqrestore(&in->lock, flags);
+ return rc;
+}
+
+/*
+ * debug_info_get
+ * - increments reference count for debug-info
+ */
+
+static void
+debug_info_get(debug_info_t * db_info)
+{
+ if (db_info)
+ atomic_inc(&db_info->ref_count);
+}
+
+/*
+ * debug_info_put:
+ * - decreases reference count for debug-info and frees it if necessary
+ */
+
+static void
+debug_info_put(debug_info_t *db_info)
+{
+ int i;
+
+ if (!db_info)
+ return;
+ if (atomic_dec_and_test(&db_info->ref_count)) {
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (!db_info->views[i])
+ continue;
+ debugfs_remove(db_info->debugfs_entries[i]);
+ }
+ debugfs_remove(db_info->debugfs_root_entry);
+ if(db_info == debug_area_first)
+ debug_area_first = db_info->next;
+ if(db_info == debug_area_last)
+ debug_area_last = db_info->prev;
+ if(db_info->prev) db_info->prev->next = db_info->next;
+ if(db_info->next) db_info->next->prev = db_info->prev;
+ debug_info_free(db_info);
+ }
+}
+
+/*
+ * debug_format_entry:
+ * - format one debug entry and return size of formated data
+ */
+
+static int
+debug_format_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id_snap = p_info->debug_info_snap;
+ struct debug_view *view = p_info->view;
+ debug_entry_t *act_entry;
+ size_t len = 0;
+ if(p_info->act_entry == DEBUG_PROLOG_ENTRY){
+ /* print prolog */
+ if (view->prolog_proc)
+ len += view->prolog_proc(id_snap,view,p_info->temp_buf);
+ goto out;
+ }
+ if (!id_snap->areas) /* this is true, if we have a prolog only view */
+ goto out; /* or if 'pages_per_area' is 0 */
+ act_entry = (debug_entry_t *) ((char*)id_snap->areas[p_info->act_area]
+ [p_info->act_page] + p_info->act_entry);
+
+ if (act_entry->id.stck == 0LL)
+ goto out; /* empty entry */
+ if (view->header_proc)
+ len += view->header_proc(id_snap, view, p_info->act_area,
+ act_entry, p_info->temp_buf + len);
+ if (view->format_proc)
+ len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+ DEBUG_DATA(act_entry));
+out:
+ return len;
+}
+
+/*
+ * debug_next_entry:
+ * - goto next entry in p_info
+ */
+
+static inline int
+debug_next_entry(file_private_info_t *p_info)
+{
+ debug_info_t *id;
+
+ id = p_info->debug_info_snap;
+ if(p_info->act_entry == DEBUG_PROLOG_ENTRY){
+ p_info->act_entry = 0;
+ p_info->act_page = 0;
+ goto out;
+ }
+ if(!id->areas)
+ return 1;
+ p_info->act_entry += id->entry_size;
+ /* switch to next page, if we reached the end of the page */
+ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)){
+ /* next page */
+ p_info->act_entry = 0;
+ p_info->act_page += 1;
+ if((p_info->act_page % id->pages_per_area) == 0) {
+ /* next area */
+ p_info->act_area++;
+ p_info->act_page=0;
+ }
+ if(p_info->act_area >= id->nr_areas)
+ return 1;
+ }
+out:
+ return 0;
+}
+
+/*
+ * debug_output:
+ * - called for user read()
+ * - copies formated debug entries to the user buffer
+ */
+
+static ssize_t
+debug_output(struct file *file, /* file descriptor */
+ char __user *user_buf, /* user buffer */
+ size_t len, /* length of buffer */
+ loff_t *offset) /* offset in the file */
+{
+ size_t count = 0;
+ size_t entry_offset;
+ file_private_info_t *p_info;
+
+ p_info = ((file_private_info_t *) file->private_data);
+ if (*offset != p_info->offset)
+ return -EPIPE;
+ if(p_info->act_area >= p_info->debug_info_snap->nr_areas)
+ return 0;
+ entry_offset = p_info->act_entry_offset;
+ while(count < len){
+ int formatted_line_size;
+ int formatted_line_residue;
+ int user_buf_residue;
+ size_t copy_size;
+
+ formatted_line_size = debug_format_entry(p_info);
+ formatted_line_residue = formatted_line_size - entry_offset;
+ user_buf_residue = len-count;
+ copy_size = min(user_buf_residue, formatted_line_residue);
+ if(copy_size){
+ if (copy_to_user(user_buf + count, p_info->temp_buf
+ + entry_offset, copy_size))
+ return -EFAULT;
+ count += copy_size;
+ entry_offset += copy_size;
+ }
+ if(copy_size == formatted_line_residue){
+ entry_offset = 0;
+ if(debug_next_entry(p_info))
+ goto out;
+ }
+ }
+out:
+ p_info->offset = *offset + count;
+ p_info->act_entry_offset = entry_offset;
+ *offset = p_info->offset;
+ return count;
+}
+
+/*
+ * debug_input:
+ * - called for user write()
+ * - calls input function of view
+ */
+
+static ssize_t
+debug_input(struct file *file, const char __user *user_buf, size_t length,
+ loff_t *offset)
+{
+ int rc = 0;
+ file_private_info_t *p_info;
+
+ mutex_lock(&debug_mutex);
+ p_info = ((file_private_info_t *) file->private_data);
+ if (p_info->view->input_proc)
+ rc = p_info->view->input_proc(p_info->debug_info_org,
+ p_info->view, file, user_buf,
+ length, offset);
+ else
+ rc = -EPERM;
+ mutex_unlock(&debug_mutex);
+ return rc; /* number of input characters */
+}
+
+/*
+ * debug_open:
+ * - called for user open()
+ * - copies formated output to private_data area of the file
+ * handle
+ */
+
+static int
+debug_open(struct inode *inode, struct file *file)
+{
+ int i, rc = 0;
+ file_private_info_t *p_info;
+ debug_info_t *debug_info, *debug_info_snapshot;
+
+ mutex_lock(&debug_mutex);
+ debug_info = file_inode(file)->i_private;
+ /* find debug view */
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (!debug_info->views[i])
+ continue;
+ else if (debug_info->debugfs_entries[i] ==
+ file->f_path.dentry) {
+ goto found; /* found view ! */
+ }
+ }
+ /* no entry found */
+ rc = -EINVAL;
+ goto out;
+
+found:
+
+ /* Make snapshot of current debug areas to get it consistent. */
+ /* To copy all the areas is only needed, if we have a view which */
+ /* formats the debug areas. */
+
+ if(!debug_info->views[i]->format_proc &&
+ !debug_info->views[i]->header_proc){
+ debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+ } else {
+ debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+ }
+
+ if(!debug_info_snapshot){
+ rc = -ENOMEM;
+ goto out;
+ }
+ p_info = kmalloc(sizeof(file_private_info_t),
+ GFP_KERNEL);
+ if(!p_info){
+ debug_info_free(debug_info_snapshot);
+ rc = -ENOMEM;
+ goto out;
+ }
+ p_info->offset = 0;
+ p_info->debug_info_snap = debug_info_snapshot;
+ p_info->debug_info_org = debug_info;
+ p_info->view = debug_info->views[i];
+ p_info->act_area = 0;
+ p_info->act_page = 0;
+ p_info->act_entry = DEBUG_PROLOG_ENTRY;
+ p_info->act_entry_offset = 0;
+ file->private_data = p_info;
+ debug_info_get(debug_info);
+ nonseekable_open(inode, file);
+out:
+ mutex_unlock(&debug_mutex);
+ return rc;
+}
+
+/*
+ * debug_close:
+ * - called for user close()
+ * - deletes private_data area of the file handle
+ */
+
+static int
+debug_close(struct inode *inode, struct file *file)
+{
+ file_private_info_t *p_info;
+ p_info = (file_private_info_t *) file->private_data;
+ if(p_info->debug_info_snap)
+ debug_info_free(p_info->debug_info_snap);
+ debug_info_put(p_info->debug_info_org);
+ kfree(file->private_data);
+ return 0; /* success */
+}
+
+/*
+ * debug_register_mode:
+ * - Creates and initializes debug area for the caller
+ * The mode parameter allows to specify access rights for the s390dbf files
+ * - Returns handle for debug area
+ */
+
+debug_info_t *debug_register_mode(const char *name, int pages_per_area,
+ int nr_areas, int buf_size, umode_t mode,
+ uid_t uid, gid_t gid)
+{
+ debug_info_t *rc = NULL;
+
+ /* Since debugfs currently does not support uid/gid other than root, */
+ /* we do not allow gid/uid != 0 until we get support for that. */
+ if ((uid != 0) || (gid != 0))
+ pr_warning("Root becomes the owner of all s390dbf files "
+ "in sysfs\n");
+ BUG_ON(!initialized);
+ mutex_lock(&debug_mutex);
+
+ /* create new debug_info */
+
+ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
+ if(!rc)
+ goto out;
+ debug_register_view(rc, &debug_level_view);
+ debug_register_view(rc, &debug_flush_view);
+ debug_register_view(rc, &debug_pages_view);
+out:
+ if (!rc){
+ pr_err("Registering debug feature %s failed\n", name);
+ }
+ mutex_unlock(&debug_mutex);
+ return rc;
+}
+EXPORT_SYMBOL(debug_register_mode);
+
+/*
+ * debug_register:
+ * - creates and initializes debug area for the caller
+ * - returns handle for debug area
+ */
+
+debug_info_t *debug_register(const char *name, int pages_per_area,
+ int nr_areas, int buf_size)
+{
+ return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
+ S_IRUSR | S_IWUSR, 0, 0);
+}
+EXPORT_SYMBOL(debug_register);
+
+/*
+ * debug_unregister:
+ * - give back debug area
+ */
+
+void
+debug_unregister(debug_info_t * id)
+{
+ if (!id)
+ goto out;
+ mutex_lock(&debug_mutex);
+ debug_info_put(id);
+ mutex_unlock(&debug_mutex);
+
+out:
+ return;
+}
+EXPORT_SYMBOL(debug_unregister);
+
+/*
+ * debug_set_size:
+ * - set area size (number of pages) and number of areas
+ */
+static int
+debug_set_size(debug_info_t* id, int nr_areas, int pages_per_area)
+{
+ unsigned long flags;
+ debug_entry_t *** new_areas;
+ int rc=0;
+
+ if(!id || (nr_areas <= 0) || (pages_per_area < 0))
+ return -EINVAL;
+ if(pages_per_area > 0){
+ new_areas = debug_areas_alloc(pages_per_area, nr_areas);
+ if(!new_areas) {
+ pr_info("Allocating memory for %i pages failed\n",
+ pages_per_area);
+ rc = -ENOMEM;
+ goto out;
+ }
+ } else {
+ new_areas = NULL;
+ }
+ spin_lock_irqsave(&id->lock,flags);
+ debug_areas_free(id);
+ id->areas = new_areas;
+ id->nr_areas = nr_areas;
+ id->pages_per_area = pages_per_area;
+ id->active_area = 0;
+ memset(id->active_entries,0,sizeof(int)*id->nr_areas);
+ memset(id->active_pages, 0, sizeof(int)*id->nr_areas);
+ spin_unlock_irqrestore(&id->lock,flags);
+ pr_info("%s: set new size (%i pages)\n" ,id->name, pages_per_area);
+out:
+ return rc;
+}
+
+/*
+ * debug_set_level:
+ * - set actual debug level
+ */
+
+void
+debug_set_level(debug_info_t* id, int new_level)
+{
+ unsigned long flags;
+ if(!id)
+ return;
+ spin_lock_irqsave(&id->lock,flags);
+ if(new_level == DEBUG_OFF_LEVEL){
+ id->level = DEBUG_OFF_LEVEL;
+ pr_info("%s: switched off\n",id->name);
+ } else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
+ pr_info("%s: level %i is out of range (%i - %i)\n",
+ id->name, new_level, 0, DEBUG_MAX_LEVEL);
+ } else {
+ id->level = new_level;
+ }
+ spin_unlock_irqrestore(&id->lock,flags);
+}
+EXPORT_SYMBOL(debug_set_level);
+
+/*
+ * proceed_active_entry:
+ * - set active entry to next in the ring buffer
+ */
+
+static inline void
+proceed_active_entry(debug_info_t * id)
+{
+ if ((id->active_entries[id->active_area] += id->entry_size)
+ > (PAGE_SIZE - id->entry_size)){
+ id->active_entries[id->active_area] = 0;
+ id->active_pages[id->active_area] =
+ (id->active_pages[id->active_area] + 1) %
+ id->pages_per_area;
+ }
+}
+
+/*
+ * proceed_active_area:
+ * - set active area to next in the ring buffer
+ */
+
+static inline void
+proceed_active_area(debug_info_t * id)
+{
+ id->active_area++;
+ id->active_area = id->active_area % id->nr_areas;
+}
+
+/*
+ * get_active_entry:
+ */
+
+static inline debug_entry_t*
+get_active_entry(debug_info_t * id)
+{
+ return (debug_entry_t *) (((char *) id->areas[id->active_area]
+ [id->active_pages[id->active_area]]) +
+ id->active_entries[id->active_area]);
+}
+
+/*
+ * debug_finish_entry:
+ * - set timestamp, caller address, cpu number etc.
+ */
+
+static inline void
+debug_finish_entry(debug_info_t * id, debug_entry_t* active, int level,
+ int exception)
+{
+ active->id.stck = get_tod_clock_fast();
+ active->id.fields.cpuid = smp_processor_id();
+ active->caller = __builtin_return_address(0);
+ active->id.fields.exception = exception;
+ active->id.fields.level = level;
+ proceed_active_entry(id);
+ if(exception)
+ proceed_active_area(id);
+}
+
+static int debug_stoppable=1;
+static int debug_active=1;
+
+#define CTL_S390DBF_STOPPABLE 5678
+#define CTL_S390DBF_ACTIVE 5679
+
+/*
+ * proc handler for the running debug_active sysctl
+ * always allow read, allow write only if debug_stoppable is set or
+ * if debug_active is already off
+ */
+static int
+s390dbf_procactive(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ if (!write || debug_stoppable || !debug_active)
+ return proc_dointvec(table, write, buffer, lenp, ppos);
+ else
+ return 0;
+}
+
+
+static struct ctl_table s390dbf_table[] = {
+ {
+ .procname = "debug_stoppable",
+ .data = &debug_stoppable,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "debug_active",
+ .data = &debug_active,
+ .maxlen = sizeof(int),
+ .mode = S_IRUGO | S_IWUSR,
+ .proc_handler = s390dbf_procactive,
+ },
+ { }
+};
+
+static struct ctl_table s390dbf_dir_table[] = {
+ {
+ .procname = "s390dbf",
+ .maxlen = 0,
+ .mode = S_IRUGO | S_IXUGO,
+ .child = s390dbf_table,
+ },
+ { }
+};
+
+static struct ctl_table_header *s390dbf_sysctl_header;
+
+void
+debug_stop_all(void)
+{
+ if (debug_stoppable)
+ debug_active = 0;
+}
+EXPORT_SYMBOL(debug_stop_all);
+
+void debug_set_critical(void)
+{
+ debug_critical = 1;
+}
+
+/*
+ * debug_event_common:
+ * - write debug entry with given size
+ */
+
+debug_entry_t*
+debug_event_common(debug_info_t * id, int level, const void *buf, int len)
+{
+ unsigned long flags;
+ debug_entry_t *active;
+
+ if (!debug_active || !id->areas)
+ return NULL;
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
+ active = get_active_entry(id);
+ memset(DEBUG_DATA(active), 0, id->buf_size);
+ memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+ debug_finish_entry(id, active, level, 0);
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ return active;
+}
+EXPORT_SYMBOL(debug_event_common);
+
+/*
+ * debug_exception_common:
+ * - write debug entry with given size and switch to next debug area
+ */
+
+debug_entry_t
+*debug_exception_common(debug_info_t * id, int level, const void *buf, int len)
+{
+ unsigned long flags;
+ debug_entry_t *active;
+
+ if (!debug_active || !id->areas)
+ return NULL;
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
+ active = get_active_entry(id);
+ memset(DEBUG_DATA(active), 0, id->buf_size);
+ memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+ debug_finish_entry(id, active, level, 1);
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ return active;
+}
+EXPORT_SYMBOL(debug_exception_common);
+
+/*
+ * counts arguments in format string for sprintf view
+ */
+
+static inline int
+debug_count_numargs(char *string)
+{
+ int numargs=0;
+
+ while(*string) {
+ if(*string++=='%')
+ numargs++;
+ }
+ return(numargs);
+}
+
+/*
+ * debug_sprintf_event:
+ */
+
+debug_entry_t*
+__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
+{
+ va_list ap;
+ int numargs,idx;
+ unsigned long flags;
+ debug_sprintf_entry_t *curr_event;
+ debug_entry_t *active;
+
+ if (!debug_active || !id->areas)
+ return NULL;
+ numargs=debug_count_numargs(string);
+
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
+ active = get_active_entry(id);
+ curr_event=(debug_sprintf_entry_t *) DEBUG_DATA(active);
+ va_start(ap,string);
+ curr_event->string=string;
+ for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++)
+ curr_event->args[idx]=va_arg(ap,long);
+ va_end(ap);
+ debug_finish_entry(id, active, level, 0);
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_event);
+
+/*
+ * debug_sprintf_exception:
+ */
+
+debug_entry_t*
+__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
+{
+ va_list ap;
+ int numargs,idx;
+ unsigned long flags;
+ debug_sprintf_entry_t *curr_event;
+ debug_entry_t *active;
+
+ if (!debug_active || !id->areas)
+ return NULL;
+
+ numargs=debug_count_numargs(string);
+
+ if (debug_critical) {
+ if (!spin_trylock_irqsave(&id->lock, flags))
+ return NULL;
+ } else
+ spin_lock_irqsave(&id->lock, flags);
+ active = get_active_entry(id);
+ curr_event=(debug_sprintf_entry_t *)DEBUG_DATA(active);
+ va_start(ap,string);
+ curr_event->string=string;
+ for(idx=0;idx<min(numargs,(int)(id->buf_size / sizeof(long))-1);idx++)
+ curr_event->args[idx]=va_arg(ap,long);
+ va_end(ap);
+ debug_finish_entry(id, active, level, 1);
+ spin_unlock_irqrestore(&id->lock, flags);
+
+ return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_exception);
+
+/*
+ * debug_register_view:
+ */
+
+int
+debug_register_view(debug_info_t * id, struct debug_view *view)
+{
+ int rc = 0;
+ int i;
+ unsigned long flags;
+ umode_t mode;
+ struct dentry *pde;
+
+ if (!id)
+ goto out;
+ mode = (id->mode | S_IFREG) & ~S_IXUGO;
+ if (!(view->prolog_proc || view->format_proc || view->header_proc))
+ mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
+ if (!view->input_proc)
+ mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+ pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
+ id , &debug_file_ops);
+ if (!pde){
+ pr_err("Registering view %s/%s failed due to out of "
+ "memory\n", id->name,view->name);
+ rc = -1;
+ goto out;
+ }
+ spin_lock_irqsave(&id->lock, flags);
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (!id->views[i])
+ break;
+ }
+ if (i == DEBUG_MAX_VIEWS) {
+ pr_err("Registering view %s/%s would exceed the maximum "
+ "number of views %i\n", id->name, view->name, i);
+ rc = -1;
+ } else {
+ id->views[i] = view;
+ id->debugfs_entries[i] = pde;
+ }
+ spin_unlock_irqrestore(&id->lock, flags);
+ if (rc)
+ debugfs_remove(pde);
+out:
+ return rc;
+}
+EXPORT_SYMBOL(debug_register_view);
+
+/*
+ * debug_unregister_view:
+ */
+
+int
+debug_unregister_view(debug_info_t * id, struct debug_view *view)
+{
+ struct dentry *dentry = NULL;
+ unsigned long flags;
+ int i, rc = 0;
+
+ if (!id)
+ goto out;
+ spin_lock_irqsave(&id->lock, flags);
+ for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+ if (id->views[i] == view)
+ break;
+ }
+ if (i == DEBUG_MAX_VIEWS)
+ rc = -1;
+ else {
+ dentry = id->debugfs_entries[i];
+ id->views[i] = NULL;
+ id->debugfs_entries[i] = NULL;
+ }
+ spin_unlock_irqrestore(&id->lock, flags);
+ debugfs_remove(dentry);
+out:
+ return rc;
+}
+EXPORT_SYMBOL(debug_unregister_view);
+
+static inline char *
+debug_get_user_string(const char __user *user_buf, size_t user_len)
+{
+ char* buffer;
+
+ buffer = kmalloc(user_len + 1, GFP_KERNEL);
+ if (!buffer)
+ return ERR_PTR(-ENOMEM);
+ if (copy_from_user(buffer, user_buf, user_len) != 0) {
+ kfree(buffer);
+ return ERR_PTR(-EFAULT);
+ }
+ /* got the string, now strip linefeed. */
+ if (buffer[user_len - 1] == '\n')
+ buffer[user_len - 1] = 0;
+ else
+ buffer[user_len] = 0;
+ return buffer;
+}
+
+static inline int
+debug_get_uint(char *buf)
+{
+ int rc;
+
+ buf = skip_spaces(buf);
+ rc = simple_strtoul(buf, &buf, 10);
+ if(*buf){
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+/*
+ * functions for debug-views
+ ***********************************
+*/
+
+/*
+ * prints out actual debug level
+ */
+
+static int
+debug_prolog_pages_fn(debug_info_t * id,
+ struct debug_view *view, char *out_buf)
+{
+ return sprintf(out_buf, "%i\n", id->pages_per_area);
+}
+
+/*
+ * reads new size (number of pages per debug area)
+ */
+
+static int
+debug_input_pages_fn(debug_info_t * id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t * offset)
+{
+ char *str;
+ int rc,new_pages;
+
+ if (user_len > 0x10000)
+ user_len = 0x10000;
+ if (*offset != 0){
+ rc = -EPIPE;
+ goto out;
+ }
+ str = debug_get_user_string(user_buf,user_len);
+ if(IS_ERR(str)){
+ rc = PTR_ERR(str);
+ goto out;
+ }
+ new_pages = debug_get_uint(str);
+ if(new_pages < 0){
+ rc = -EINVAL;
+ goto free_str;
+ }
+ rc = debug_set_size(id,id->nr_areas, new_pages);
+ if(rc != 0){
+ rc = -EINVAL;
+ goto free_str;
+ }
+ rc = user_len;
+free_str:
+ kfree(str);
+out:
+ *offset += user_len;
+ return rc; /* number of input characters */
+}
+
+/*
+ * prints out actual debug level
+ */
+
+static int
+debug_prolog_level_fn(debug_info_t * id, struct debug_view *view, char *out_buf)
+{
+ int rc = 0;
+
+ if(id->level == DEBUG_OFF_LEVEL) {
+ rc = sprintf(out_buf,"-\n");
+ }
+ else {
+ rc = sprintf(out_buf, "%i\n", id->level);
+ }
+ return rc;
+}
+
+/*
+ * reads new debug level
+ */
+
+static int
+debug_input_level_fn(debug_info_t * id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t * offset)
+{
+ char *str;
+ int rc,new_level;
+
+ if (user_len > 0x10000)
+ user_len = 0x10000;
+ if (*offset != 0){
+ rc = -EPIPE;
+ goto out;
+ }
+ str = debug_get_user_string(user_buf,user_len);
+ if(IS_ERR(str)){
+ rc = PTR_ERR(str);
+ goto out;
+ }
+ if(str[0] == '-'){
+ debug_set_level(id, DEBUG_OFF_LEVEL);
+ rc = user_len;
+ goto free_str;
+ } else {
+ new_level = debug_get_uint(str);
+ }
+ if(new_level < 0) {
+ pr_warning("%s is not a valid level for a debug "
+ "feature\n", str);
+ rc = -EINVAL;
+ } else {
+ debug_set_level(id, new_level);
+ rc = user_len;
+ }
+free_str:
+ kfree(str);
+out:
+ *offset += user_len;
+ return rc; /* number of input characters */
+}
+
+
+/*
+ * flushes debug areas
+ */
+
+static void debug_flush(debug_info_t* id, int area)
+{
+ unsigned long flags;
+ int i,j;
+
+ if(!id || !id->areas)
+ return;
+ spin_lock_irqsave(&id->lock,flags);
+ if(area == DEBUG_FLUSH_ALL){
+ id->active_area = 0;
+ memset(id->active_entries, 0, id->nr_areas * sizeof(int));
+ for (i = 0; i < id->nr_areas; i++) {
+ id->active_pages[i] = 0;
+ for(j = 0; j < id->pages_per_area; j++) {
+ memset(id->areas[i][j], 0, PAGE_SIZE);
+ }
+ }
+ } else if(area >= 0 && area < id->nr_areas) {
+ id->active_entries[area] = 0;
+ id->active_pages[area] = 0;
+ for(i = 0; i < id->pages_per_area; i++) {
+ memset(id->areas[area][i],0,PAGE_SIZE);
+ }
+ }
+ spin_unlock_irqrestore(&id->lock,flags);
+}
+
+/*
+ * view function: flushes debug areas
+ */
+
+static int
+debug_input_flush_fn(debug_info_t * id, struct debug_view *view,
+ struct file *file, const char __user *user_buf,
+ size_t user_len, loff_t * offset)
+{
+ char input_buf[1];
+ int rc = user_len;
+
+ if (user_len > 0x10000)
+ user_len = 0x10000;
+ if (*offset != 0){
+ rc = -EPIPE;
+ goto out;
+ }
+ if (copy_from_user(input_buf, user_buf, 1)){
+ rc = -EFAULT;
+ goto out;
+ }
+ if(input_buf[0] == '-') {
+ debug_flush(id, DEBUG_FLUSH_ALL);
+ goto out;
+ }
+ if (isdigit(input_buf[0])) {
+ int area = ((int) input_buf[0] - (int) '0');
+ debug_flush(id, area);
+ goto out;
+ }
+
+ pr_info("Flushing debug data failed because %c is not a valid "
+ "area\n", input_buf[0]);
+
+out:
+ *offset += user_len;
+ return rc; /* number of input characters */
+}
+
+/*
+ * prints debug header in raw format
+ */
+
+static int
+debug_raw_header_fn(debug_info_t * id, struct debug_view *view,
+ int area, debug_entry_t * entry, char *out_buf)
+{
+ int rc;
+
+ rc = sizeof(debug_entry_t);
+ memcpy(out_buf,entry,sizeof(debug_entry_t));
+ return rc;
+}
+
+/*
+ * prints debug data in raw format
+ */
+
+static int
+debug_raw_format_fn(debug_info_t * id, struct debug_view *view,
+ char *out_buf, const char *in_buf)
+{
+ int rc;
+
+ rc = id->buf_size;
+ memcpy(out_buf, in_buf, id->buf_size);
+ return rc;
+}
+
+/*
+ * prints debug data in hex/ascii format
+ */
+
+static int
+debug_hex_ascii_format_fn(debug_info_t * id, struct debug_view *view,
+ char *out_buf, const char *in_buf)
+{
+ int i, rc = 0;
+
+ for (i = 0; i < id->buf_size; i++) {
+ rc += sprintf(out_buf + rc, "%02x ",
+ ((unsigned char *) in_buf)[i]);
+ }
+ rc += sprintf(out_buf + rc, "| ");
+ for (i = 0; i < id->buf_size; i++) {
+ unsigned char c = in_buf[i];
+ if (isascii(c) && isprint(c))
+ rc += sprintf(out_buf + rc, "%c", c);
+ else
+ rc += sprintf(out_buf + rc, ".");
+ }
+ rc += sprintf(out_buf + rc, "\n");
+ return rc;
+}
+
+/*
+ * prints header for debug entry
+ */
+
+int
+debug_dflt_header_fn(debug_info_t * id, struct debug_view *view,
+ int area, debug_entry_t * entry, char *out_buf)
+{
+ struct timespec time_spec;
+ char *except_str;
+ unsigned long caller;
+ int rc = 0;
+ unsigned int level;
+
+ level = entry->id.fields.level;
+ stck_to_timespec(entry->id.stck, &time_spec);
+
+ if (entry->id.fields.exception)
+ except_str = "*";
+ else
+ except_str = "-";
+ caller = ((unsigned long) entry->caller) & PSW_ADDR_INSN;
+ rc += sprintf(out_buf, "%02i %011lu:%06lu %1u %1s %02i %p ",
+ area, time_spec.tv_sec, time_spec.tv_nsec / 1000, level,
+ except_str, entry->id.fields.cpuid, (void *) caller);
+ return rc;
+}
+EXPORT_SYMBOL(debug_dflt_header_fn);
+
+/*
+ * prints debug data sprintf-formated:
+ * debug_sprinf_event/exception calls must be used together with this view
+ */
+
+#define DEBUG_SPRINTF_MAX_ARGS 10
+
+static int
+debug_sprintf_format_fn(debug_info_t * id, struct debug_view *view,
+ char *out_buf, debug_sprintf_entry_t *curr_event)
+{
+ int num_longs, num_used_args = 0,i, rc = 0;
+ int index[DEBUG_SPRINTF_MAX_ARGS];
+
+ /* count of longs fit into one entry */
+ num_longs = id->buf_size / sizeof(long);
+
+ if(num_longs < 1)
+ goto out; /* bufsize of entry too small */
+ if(num_longs == 1) {
+ /* no args, we use only the string */
+ strcpy(out_buf, curr_event->string);
+ rc = strlen(curr_event->string);
+ goto out;
+ }
+
+ /* number of arguments used for sprintf (without the format string) */
+ num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1));
+
+ memset(index,0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int));
+
+ for(i = 0; i < num_used_args; i++)
+ index[i] = i;
+
+ rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
+ curr_event->args[index[1]], curr_event->args[index[2]],
+ curr_event->args[index[3]], curr_event->args[index[4]],
+ curr_event->args[index[5]], curr_event->args[index[6]],
+ curr_event->args[index[7]], curr_event->args[index[8]],
+ curr_event->args[index[9]]);
+
+out:
+
+ return rc;
+}
+
+/*
+ * debug_init:
+ * - is called exactly once to initialize the debug feature
+ */
+static int __init debug_init(void)
+{
+ s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
+ mutex_lock(&debug_mutex);
+ debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
+ initialized = 1;
+ mutex_unlock(&debug_mutex);
+ return 0;
+}
+postcore_initcall(debug_init);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
new file mode 100644
index 000000000..2f69243bf
--- /dev/null
+++ b/arch/s390/kernel/diag.c
@@ -0,0 +1,66 @@
+/*
+ * Implementation of s390 diagnose codes
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <asm/diag.h>
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+ register unsigned long _ry1 asm("2") = ry1;
+ register unsigned long _ry2 asm("3") = subcode;
+ int rc = 0;
+
+ asm volatile(
+ " sam31\n"
+ " diag %2,2,0x14\n"
+ " sam64\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (rc), "+d" (_ry2)
+ : "d" (rx), "d" (_ry1)
+ : "cc");
+
+ return rc;
+}
+EXPORT_SYMBOL(diag14);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+int diag210(struct diag210 *addr)
+{
+ /*
+ * diag 210 needs its data below the 2GB border, so we
+ * use a static data area to be sure
+ */
+ static struct diag210 diag210_tmp;
+ static DEFINE_SPINLOCK(diag210_lock);
+ unsigned long flags;
+ int ccode;
+
+ spin_lock_irqsave(&diag210_lock, flags);
+ diag210_tmp = *addr;
+
+ asm volatile(
+ " lhi %0,-1\n"
+ " sam31\n"
+ " diag %1,0,0x210\n"
+ "0: ipm %0\n"
+ " srl %0,28\n"
+ "1: sam64\n"
+ EX_TABLE(0b, 1b)
+ : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
+
+ *addr = diag210_tmp;
+ spin_unlock_irqrestore(&diag210_lock, flags);
+
+ return ccode;
+}
+EXPORT_SYMBOL(diag210);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
new file mode 100644
index 000000000..8140d10c6
--- /dev/null
+++ b/arch/s390/kernel/dis.c
@@ -0,0 +1,2049 @@
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/reboot.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+
+#include <asm/uaccess.h>
+#include <asm/dis.h>
+#include <asm/io.h>
+#include <linux/atomic.h>
+#include <asm/mathemu.h>
+#include <asm/cpcmd.h>
+#include <asm/lowcore.h>
+#include <asm/debug.h>
+#include <asm/irq.h>
+
+enum {
+ UNUSED, /* Indicates the end of the operand list */
+ R_8, /* GPR starting at position 8 */
+ R_12, /* GPR starting at position 12 */
+ R_16, /* GPR starting at position 16 */
+ R_20, /* GPR starting at position 20 */
+ R_24, /* GPR starting at position 24 */
+ R_28, /* GPR starting at position 28 */
+ R_32, /* GPR starting at position 32 */
+ F_8, /* FPR starting at position 8 */
+ F_12, /* FPR starting at position 12 */
+ F_16, /* FPR starting at position 16 */
+ F_20, /* FPR starting at position 16 */
+ F_24, /* FPR starting at position 24 */
+ F_28, /* FPR starting at position 28 */
+ F_32, /* FPR starting at position 32 */
+ A_8, /* Access reg. starting at position 8 */
+ A_12, /* Access reg. starting at position 12 */
+ A_24, /* Access reg. starting at position 24 */
+ A_28, /* Access reg. starting at position 28 */
+ C_8, /* Control reg. starting at position 8 */
+ C_12, /* Control reg. starting at position 12 */
+ V_8, /* Vector reg. starting at position 8, extension bit at 36 */
+ V_12, /* Vector reg. starting at position 12, extension bit at 37 */
+ V_16, /* Vector reg. starting at position 16, extension bit at 38 */
+ V_32, /* Vector reg. starting at position 32, extension bit at 39 */
+ W_12, /* Vector reg. at bit 12, extension at bit 37, used as index */
+ B_16, /* Base register starting at position 16 */
+ B_32, /* Base register starting at position 32 */
+ X_12, /* Index register starting at position 12 */
+ D_20, /* Displacement starting at position 20 */
+ D_36, /* Displacement starting at position 36 */
+ D20_20, /* 20 bit displacement starting at 20 */
+ L4_8, /* 4 bit length starting at position 8 */
+ L4_12, /* 4 bit length starting at position 12 */
+ L8_8, /* 8 bit length starting at position 8 */
+ U4_8, /* 4 bit unsigned value starting at 8 */
+ U4_12, /* 4 bit unsigned value starting at 12 */
+ U4_16, /* 4 bit unsigned value starting at 16 */
+ U4_20, /* 4 bit unsigned value starting at 20 */
+ U4_24, /* 4 bit unsigned value starting at 24 */
+ U4_28, /* 4 bit unsigned value starting at 28 */
+ U4_32, /* 4 bit unsigned value starting at 32 */
+ U4_36, /* 4 bit unsigned value starting at 36 */
+ U8_8, /* 8 bit unsigned value starting at 8 */
+ U8_16, /* 8 bit unsigned value starting at 16 */
+ U8_24, /* 8 bit unsigned value starting at 24 */
+ U8_32, /* 8 bit unsigned value starting at 32 */
+ I8_8, /* 8 bit signed value starting at 8 */
+ I8_16, /* 8 bit signed value starting at 16 */
+ I8_24, /* 8 bit signed value starting at 24 */
+ I8_32, /* 8 bit signed value starting at 32 */
+ J12_12, /* PC relative offset at 12 */
+ I16_16, /* 16 bit signed value starting at 16 */
+ I16_32, /* 32 bit signed value starting at 16 */
+ U16_16, /* 16 bit unsigned value starting at 16 */
+ U16_32, /* 32 bit unsigned value starting at 16 */
+ J16_16, /* PC relative jump offset at 16 */
+ J16_32, /* PC relative offset at 16 */
+ I24_24, /* 24 bit signed value starting at 24 */
+ J32_16, /* PC relative long offset at 16 */
+ I32_16, /* 32 bit signed value starting at 16 */
+ U32_16, /* 32 bit unsigned value starting at 16 */
+ M_16, /* 4 bit optional mask starting at 16 */
+ M_20, /* 4 bit optional mask starting at 20 */
+ M_24, /* 4 bit optional mask starting at 24 */
+ M_28, /* 4 bit optional mask starting at 28 */
+ M_32, /* 4 bit optional mask starting at 32 */
+ RO_28, /* optional GPR starting at position 28 */
+};
+
+/*
+ * Enumeration of the different instruction formats.
+ * For details consult the principles of operation.
+ */
+enum {
+ INSTR_INVALID,
+ INSTR_E,
+ INSTR_IE_UU,
+ INSTR_MII_UPI,
+ INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU,
+ INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0,
+ INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP,
+ INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU,
+ INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP,
+ INSTR_RRE_00, INSTR_RRE_0R, INSTR_RRE_AA, INSTR_RRE_AR, INSTR_RRE_F0,
+ INSTR_RRE_FF, INSTR_RRE_FR, INSTR_RRE_R0, INSTR_RRE_RA, INSTR_RRE_RF,
+ INSTR_RRE_RR, INSTR_RRE_RR_OPT,
+ INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR,
+ INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_FUFF2, INSTR_RRF_M0RR,
+ INSTR_RRF_R0RR, INSTR_RRF_R0RR2, INSTR_RRF_RMRR, INSTR_RRF_RURR,
+ INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, INSTR_RRF_UUFF,
+ INSTR_RRF_UUFR, INSTR_RRF_UURF,
+ INSTR_RRR_F0FF, INSTR_RRS_RRRDU,
+ INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR,
+ INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD,
+ INSTR_RSI_RRP,
+ INSTR_RSL_LRDFU, INSTR_RSL_R0RD,
+ INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD,
+ INSTR_RSY_RDRM, INSTR_RSY_RMRD,
+ INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD,
+ INSTR_RS_RURD,
+ INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM,
+ INSTR_RXF_FRRDF,
+ INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD,
+ INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD,
+ INSTR_SIL_RDI, INSTR_SIL_RDU,
+ INSTR_SIY_IRD, INSTR_SIY_URD,
+ INSTR_SI_URD,
+ INSTR_SMI_U0RDP,
+ INSTR_SSE_RDRD,
+ INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2,
+ INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD,
+ INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3,
+ INSTR_S_00, INSTR_S_RD,
+ INSTR_VRI_V0IM, INSTR_VRI_V0I0, INSTR_VRI_V0IIM, INSTR_VRI_VVIM,
+ INSTR_VRI_VVV0IM, INSTR_VRI_VVV0I0, INSTR_VRI_VVIMM,
+ INSTR_VRR_VV00MMM, INSTR_VRR_VV000MM, INSTR_VRR_VV0000M,
+ INSTR_VRR_VV00000, INSTR_VRR_VVV0M0M, INSTR_VRR_VV00M0M,
+ INSTR_VRR_VVV000M, INSTR_VRR_VVV000V, INSTR_VRR_VVV0000,
+ INSTR_VRR_VVV0MMM, INSTR_VRR_VVV00MM, INSTR_VRR_VVVMM0V,
+ INSTR_VRR_VVVM0MV, INSTR_VRR_VVVM00V, INSTR_VRR_VRR0000,
+ INSTR_VRS_VVRDM, INSTR_VRS_VVRD0, INSTR_VRS_VRRDM, INSTR_VRS_VRRD0,
+ INSTR_VRS_RVRDM,
+ INSTR_VRV_VVRDM, INSTR_VRV_VWRDM,
+ INSTR_VRX_VRRDM, INSTR_VRX_VRRD0,
+};
+
+static const struct s390_operand operands[] =
+{
+ [UNUSED] = { 0, 0, 0 },
+ [R_8] = { 4, 8, OPERAND_GPR },
+ [R_12] = { 4, 12, OPERAND_GPR },
+ [R_16] = { 4, 16, OPERAND_GPR },
+ [R_20] = { 4, 20, OPERAND_GPR },
+ [R_24] = { 4, 24, OPERAND_GPR },
+ [R_28] = { 4, 28, OPERAND_GPR },
+ [R_32] = { 4, 32, OPERAND_GPR },
+ [F_8] = { 4, 8, OPERAND_FPR },
+ [F_12] = { 4, 12, OPERAND_FPR },
+ [F_16] = { 4, 16, OPERAND_FPR },
+ [F_20] = { 4, 16, OPERAND_FPR },
+ [F_24] = { 4, 24, OPERAND_FPR },
+ [F_28] = { 4, 28, OPERAND_FPR },
+ [F_32] = { 4, 32, OPERAND_FPR },
+ [A_8] = { 4, 8, OPERAND_AR },
+ [A_12] = { 4, 12, OPERAND_AR },
+ [A_24] = { 4, 24, OPERAND_AR },
+ [A_28] = { 4, 28, OPERAND_AR },
+ [C_8] = { 4, 8, OPERAND_CR },
+ [C_12] = { 4, 12, OPERAND_CR },
+ [V_8] = { 4, 8, OPERAND_VR },
+ [V_12] = { 4, 12, OPERAND_VR },
+ [V_16] = { 4, 16, OPERAND_VR },
+ [V_32] = { 4, 32, OPERAND_VR },
+ [W_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR },
+ [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR },
+ [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR },
+ [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR },
+ [D_20] = { 12, 20, OPERAND_DISP },
+ [D_36] = { 12, 36, OPERAND_DISP },
+ [D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED },
+ [L4_8] = { 4, 8, OPERAND_LENGTH },
+ [L4_12] = { 4, 12, OPERAND_LENGTH },
+ [L8_8] = { 8, 8, OPERAND_LENGTH },
+ [U4_8] = { 4, 8, 0 },
+ [U4_12] = { 4, 12, 0 },
+ [U4_16] = { 4, 16, 0 },
+ [U4_20] = { 4, 20, 0 },
+ [U4_24] = { 4, 24, 0 },
+ [U4_28] = { 4, 28, 0 },
+ [U4_32] = { 4, 32, 0 },
+ [U4_36] = { 4, 36, 0 },
+ [U8_8] = { 8, 8, 0 },
+ [U8_16] = { 8, 16, 0 },
+ [U8_24] = { 8, 24, 0 },
+ [U8_32] = { 8, 32, 0 },
+ [J12_12] = { 12, 12, OPERAND_PCREL },
+ [I8_8] = { 8, 8, OPERAND_SIGNED },
+ [I8_16] = { 8, 16, OPERAND_SIGNED },
+ [I8_24] = { 8, 24, OPERAND_SIGNED },
+ [I8_32] = { 8, 32, OPERAND_SIGNED },
+ [I16_32] = { 16, 32, OPERAND_SIGNED },
+ [I16_16] = { 16, 16, OPERAND_SIGNED },
+ [U16_16] = { 16, 16, 0 },
+ [U16_32] = { 16, 32, 0 },
+ [J16_16] = { 16, 16, OPERAND_PCREL },
+ [J16_32] = { 16, 32, OPERAND_PCREL },
+ [I24_24] = { 24, 24, OPERAND_SIGNED },
+ [J32_16] = { 32, 16, OPERAND_PCREL },
+ [I32_16] = { 32, 16, OPERAND_SIGNED },
+ [U32_16] = { 32, 16, 0 },
+ [M_16] = { 4, 16, 0 },
+ [M_20] = { 4, 20, 0 },
+ [M_24] = { 4, 24, 0 },
+ [M_28] = { 4, 28, 0 },
+ [M_32] = { 4, 32, 0 },
+ [RO_28] = { 4, 28, OPERAND_GPR }
+};
+
+static const unsigned char formats[][7] = {
+ [INSTR_E] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_IE_UU] = { 0xff, U4_24,U4_28,0,0,0,0 },
+ [INSTR_MII_UPI] = { 0xff, U4_8,J12_12,I24_24 },
+ [INSTR_RIE_R0IU] = { 0xff, R_8,I16_16,U4_32,0,0,0 },
+ [INSTR_RIE_R0UU] = { 0xff, R_8,U16_16,U4_32,0,0,0 },
+ [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 },
+ [INSTR_RIE_RRPU] = { 0xff, R_8,R_12,U4_32,J16_16,0,0 },
+ [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 },
+ [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 },
+ [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 },
+ [INSTR_RIE_RUPU] = { 0xff, R_8,U8_32,U4_12,J16_16,0,0 },
+ [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 },
+ [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 },
+ [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 },
+ [INSTR_RIL_UP] = { 0x0f, U4_8,J32_16,0,0,0,0 },
+ [INSTR_RIS_R0RDU] = { 0xff, R_8,U8_32,D_20,B_16,0,0 },
+ [INSTR_RIS_RURDI] = { 0xff, R_8,I8_32,U4_12,D_20,B_16,0 },
+ [INSTR_RIS_RURDU] = { 0xff, R_8,U8_32,U4_12,D_20,B_16,0 },
+ [INSTR_RI_RI] = { 0x0f, R_8,I16_16,0,0,0,0 },
+ [INSTR_RI_RP] = { 0x0f, R_8,J16_16,0,0,0,0 },
+ [INSTR_RI_RU] = { 0x0f, R_8,U16_16,0,0,0,0 },
+ [INSTR_RI_UP] = { 0x0f, U4_8,J16_16,0,0,0,0 },
+ [INSTR_RRE_00] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_RRE_0R] = { 0xff, R_28,0,0,0,0,0 },
+ [INSTR_RRE_AA] = { 0xff, A_24,A_28,0,0,0,0 },
+ [INSTR_RRE_AR] = { 0xff, A_24,R_28,0,0,0,0 },
+ [INSTR_RRE_F0] = { 0xff, F_24,0,0,0,0,0 },
+ [INSTR_RRE_FF] = { 0xff, F_24,F_28,0,0,0,0 },
+ [INSTR_RRE_FR] = { 0xff, F_24,R_28,0,0,0,0 },
+ [INSTR_RRE_R0] = { 0xff, R_24,0,0,0,0,0 },
+ [INSTR_RRE_RA] = { 0xff, R_24,A_28,0,0,0,0 },
+ [INSTR_RRE_RF] = { 0xff, R_24,F_28,0,0,0,0 },
+ [INSTR_RRE_RR] = { 0xff, R_24,R_28,0,0,0,0 },
+ [INSTR_RRE_RR_OPT]= { 0xff, R_24,RO_28,0,0,0,0 },
+ [INSTR_RRF_0UFF] = { 0xff, F_24,F_28,U4_20,0,0,0 },
+ [INSTR_RRF_F0FF2] = { 0xff, F_24,F_16,F_28,0,0,0 },
+ [INSTR_RRF_F0FF] = { 0xff, F_16,F_24,F_28,0,0,0 },
+ [INSTR_RRF_F0FR] = { 0xff, F_24,F_16,R_28,0,0,0 },
+ [INSTR_RRF_FFRU] = { 0xff, F_24,F_16,R_28,U4_20,0,0 },
+ [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 },
+ [INSTR_RRF_FUFF2] = { 0xff, F_24,F_28,F_16,U4_20,0,0 },
+ [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 },
+ [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 },
+ [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 },
+ [INSTR_RRF_RMRR] = { 0xff, R_24,R_16,R_28,M_20,0,0 },
+ [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 },
+ [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 },
+ [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 },
+ [INSTR_RRF_U0RR] = { 0xff, R_24,R_28,U4_16,0,0,0 },
+ [INSTR_RRF_UUFF] = { 0xff, F_24,U4_16,F_28,U4_20,0,0 },
+ [INSTR_RRF_UUFR] = { 0xff, F_24,U4_16,R_28,U4_20,0,0 },
+ [INSTR_RRF_UURF] = { 0xff, R_24,U4_16,F_28,U4_20,0,0 },
+ [INSTR_RRR_F0FF] = { 0xff, F_24,F_28,F_16,0,0,0 },
+ [INSTR_RRS_RRRDU] = { 0xff, R_8,R_12,U4_32,D_20,B_16,0 },
+ [INSTR_RR_FF] = { 0xff, F_8,F_12,0,0,0,0 },
+ [INSTR_RR_R0] = { 0xff, R_8, 0,0,0,0,0 },
+ [INSTR_RR_RR] = { 0xff, R_8,R_12,0,0,0,0 },
+ [INSTR_RR_U0] = { 0xff, U8_8, 0,0,0,0,0 },
+ [INSTR_RR_UR] = { 0xff, U4_8,R_12,0,0,0,0 },
+ [INSTR_RSE_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 },
+ [INSTR_RSE_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 },
+ [INSTR_RSE_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
+ [INSTR_RSI_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 },
+ [INSTR_RSL_LRDFU] = { 0xff, F_32,D_20,L4_8,B_16,U4_36,0 },
+ [INSTR_RSL_R0RD] = { 0xff, D_20,L4_8,B_16,0,0,0 },
+ [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 },
+ [INSTR_RSY_RMRD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 },
+ [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 },
+ [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 },
+ [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 },
+ [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 },
+ [INSTR_RS_RRRD] = { 0xff, R_8,R_12,D_20,B_16,0,0 },
+ [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
+ [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RXE_RRRDM] = { 0xff, R_8,D_20,X_12,B_16,M_32,0 },
+ [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 },
+ [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RXY_URRD] = { 0xff, U4_8,D20_20,X_12,B_16,0,0 },
+ [INSTR_RX_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RX_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RX_URRD] = { 0xff, U4_8,D_20,X_12,B_16,0,0 },
+ [INSTR_SIL_RDI] = { 0xff, D_20,B_16,I16_32,0,0,0 },
+ [INSTR_SIL_RDU] = { 0xff, D_20,B_16,U16_32,0,0,0 },
+ [INSTR_SIY_IRD] = { 0xff, D20_20,B_16,I8_8,0,0,0 },
+ [INSTR_SIY_URD] = { 0xff, D20_20,B_16,U8_8,0,0,0 },
+ [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 },
+ [INSTR_SMI_U0RDP] = { 0xff, U4_8,J16_32,D_20,B_16,0,0 },
+ [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 },
+ [INSTR_SSF_RRDRD] = { 0x0f, D_20,B_16,D_36,B_32,R_8,0 },
+ [INSTR_SSF_RRDRD2]= { 0x0f, R_8,D_20,B_16,D_36,B_32,0 },
+ [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 },
+ [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 },
+ [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 },
+ [INSTR_SS_RRRDRD2]= { 0xff, R_8,D_20,B_16,R_12,D_36,B_32 },
+ [INSTR_SS_RRRDRD3]= { 0xff, R_8,R_12,D_20,B_16,D_36,B_32 },
+ [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 },
+ [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 },
+ [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 },
+ [INSTR_VRI_V0IM] = { 0xff, V_8,I16_16,M_32,0,0,0 },
+ [INSTR_VRI_V0I0] = { 0xff, V_8,I16_16,0,0,0,0 },
+ [INSTR_VRI_V0IIM] = { 0xff, V_8,I8_16,I8_24,M_32,0,0 },
+ [INSTR_VRI_VVIM] = { 0xff, V_8,I16_16,V_12,M_32,0,0 },
+ [INSTR_VRI_VVV0IM]= { 0xff, V_8,V_12,V_16,I8_24,M_32,0 },
+ [INSTR_VRI_VVV0I0]= { 0xff, V_8,V_12,V_16,I8_24,0,0 },
+ [INSTR_VRI_VVIMM] = { 0xff, V_8,V_12,I16_16,M_32,M_28,0 },
+ [INSTR_VRR_VV00MMM]={ 0xff, V_8,V_12,M_32,M_28,M_24,0 },
+ [INSTR_VRR_VV000MM]={ 0xff, V_8,V_12,M_32,M_28,0,0 },
+ [INSTR_VRR_VV0000M]={ 0xff, V_8,V_12,M_32,0,0,0 },
+ [INSTR_VRR_VV00000]={ 0xff, V_8,V_12,0,0,0,0 },
+ [INSTR_VRR_VVV0M0M]={ 0xff, V_8,V_12,V_16,M_32,M_24,0 },
+ [INSTR_VRR_VV00M0M]={ 0xff, V_8,V_12,M_32,M_24,0,0 },
+ [INSTR_VRR_VVV000M]={ 0xff, V_8,V_12,V_16,M_32,0,0 },
+ [INSTR_VRR_VVV000V]={ 0xff, V_8,V_12,V_16,V_32,0,0 },
+ [INSTR_VRR_VVV0000]={ 0xff, V_8,V_12,V_16,0,0,0 },
+ [INSTR_VRR_VVV0MMM]={ 0xff, V_8,V_12,V_16,M_32,M_28,M_24 },
+ [INSTR_VRR_VVV00MM]={ 0xff, V_8,V_12,V_16,M_32,M_28,0 },
+ [INSTR_VRR_VVVMM0V]={ 0xff, V_8,V_12,V_16,V_32,M_20,M_24 },
+ [INSTR_VRR_VVVM0MV]={ 0xff, V_8,V_12,V_16,V_32,M_28,M_20 },
+ [INSTR_VRR_VVVM00V]={ 0xff, V_8,V_12,V_16,V_32,M_20,0 },
+ [INSTR_VRR_VRR0000]={ 0xff, V_8,R_12,R_16,0,0,0 },
+ [INSTR_VRS_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 },
+ [INSTR_VRS_VVRD0] = { 0xff, V_8,V_12,D_20,B_16,0,0 },
+ [INSTR_VRS_VRRDM] = { 0xff, V_8,R_12,D_20,B_16,M_32,0 },
+ [INSTR_VRS_VRRD0] = { 0xff, V_8,R_12,D_20,B_16,0,0 },
+ [INSTR_VRS_RVRDM] = { 0xff, R_8,V_12,D_20,B_16,M_32,0 },
+ [INSTR_VRV_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 },
+ [INSTR_VRV_VWRDM] = { 0xff, V_8,D_20,W_12,B_16,M_32,0 },
+ [INSTR_VRX_VRRDM] = { 0xff, V_8,D_20,X_12,B_16,M_32,0 },
+ [INSTR_VRX_VRRD0] = { 0xff, V_8,D_20,X_12,B_16,0,0 },
+};
+
+enum {
+ LONG_INSN_ALGHSIK,
+ LONG_INSN_ALHHHR,
+ LONG_INSN_ALHHLR,
+ LONG_INSN_ALHSIK,
+ LONG_INSN_ALSIHN,
+ LONG_INSN_CDFBRA,
+ LONG_INSN_CDGBRA,
+ LONG_INSN_CDGTRA,
+ LONG_INSN_CDLFBR,
+ LONG_INSN_CDLFTR,
+ LONG_INSN_CDLGBR,
+ LONG_INSN_CDLGTR,
+ LONG_INSN_CEFBRA,
+ LONG_INSN_CEGBRA,
+ LONG_INSN_CELFBR,
+ LONG_INSN_CELGBR,
+ LONG_INSN_CFDBRA,
+ LONG_INSN_CFEBRA,
+ LONG_INSN_CFXBRA,
+ LONG_INSN_CGDBRA,
+ LONG_INSN_CGDTRA,
+ LONG_INSN_CGEBRA,
+ LONG_INSN_CGXBRA,
+ LONG_INSN_CGXTRA,
+ LONG_INSN_CLFDBR,
+ LONG_INSN_CLFDTR,
+ LONG_INSN_CLFEBR,
+ LONG_INSN_CLFHSI,
+ LONG_INSN_CLFXBR,
+ LONG_INSN_CLFXTR,
+ LONG_INSN_CLGDBR,
+ LONG_INSN_CLGDTR,
+ LONG_INSN_CLGEBR,
+ LONG_INSN_CLGFRL,
+ LONG_INSN_CLGHRL,
+ LONG_INSN_CLGHSI,
+ LONG_INSN_CLGXBR,
+ LONG_INSN_CLGXTR,
+ LONG_INSN_CLHHSI,
+ LONG_INSN_CXFBRA,
+ LONG_INSN_CXGBRA,
+ LONG_INSN_CXGTRA,
+ LONG_INSN_CXLFBR,
+ LONG_INSN_CXLFTR,
+ LONG_INSN_CXLGBR,
+ LONG_INSN_CXLGTR,
+ LONG_INSN_FIDBRA,
+ LONG_INSN_FIEBRA,
+ LONG_INSN_FIXBRA,
+ LONG_INSN_LDXBRA,
+ LONG_INSN_LEDBRA,
+ LONG_INSN_LEXBRA,
+ LONG_INSN_LLGFAT,
+ LONG_INSN_LLGFRL,
+ LONG_INSN_LLGHRL,
+ LONG_INSN_LLGTAT,
+ LONG_INSN_POPCNT,
+ LONG_INSN_RIEMIT,
+ LONG_INSN_RINEXT,
+ LONG_INSN_RISBGN,
+ LONG_INSN_RISBHG,
+ LONG_INSN_RISBLG,
+ LONG_INSN_SLHHHR,
+ LONG_INSN_SLHHLR,
+ LONG_INSN_TABORT,
+ LONG_INSN_TBEGIN,
+ LONG_INSN_TBEGINC,
+ LONG_INSN_PCISTG,
+ LONG_INSN_MPCIFC,
+ LONG_INSN_STPCIFC,
+ LONG_INSN_PCISTB,
+ LONG_INSN_VPOPCT,
+ LONG_INSN_VERLLV,
+ LONG_INSN_VESRAV,
+ LONG_INSN_VESRLV,
+ LONG_INSN_VSBCBI,
+ LONG_INSN_STCCTM
+};
+
+static char *long_insn_name[] = {
+ [LONG_INSN_ALGHSIK] = "alghsik",
+ [LONG_INSN_ALHHHR] = "alhhhr",
+ [LONG_INSN_ALHHLR] = "alhhlr",
+ [LONG_INSN_ALHSIK] = "alhsik",
+ [LONG_INSN_ALSIHN] = "alsihn",
+ [LONG_INSN_CDFBRA] = "cdfbra",
+ [LONG_INSN_CDGBRA] = "cdgbra",
+ [LONG_INSN_CDGTRA] = "cdgtra",
+ [LONG_INSN_CDLFBR] = "cdlfbr",
+ [LONG_INSN_CDLFTR] = "cdlftr",
+ [LONG_INSN_CDLGBR] = "cdlgbr",
+ [LONG_INSN_CDLGTR] = "cdlgtr",
+ [LONG_INSN_CEFBRA] = "cefbra",
+ [LONG_INSN_CEGBRA] = "cegbra",
+ [LONG_INSN_CELFBR] = "celfbr",
+ [LONG_INSN_CELGBR] = "celgbr",
+ [LONG_INSN_CFDBRA] = "cfdbra",
+ [LONG_INSN_CFEBRA] = "cfebra",
+ [LONG_INSN_CFXBRA] = "cfxbra",
+ [LONG_INSN_CGDBRA] = "cgdbra",
+ [LONG_INSN_CGDTRA] = "cgdtra",
+ [LONG_INSN_CGEBRA] = "cgebra",
+ [LONG_INSN_CGXBRA] = "cgxbra",
+ [LONG_INSN_CGXTRA] = "cgxtra",
+ [LONG_INSN_CLFDBR] = "clfdbr",
+ [LONG_INSN_CLFDTR] = "clfdtr",
+ [LONG_INSN_CLFEBR] = "clfebr",
+ [LONG_INSN_CLFHSI] = "clfhsi",
+ [LONG_INSN_CLFXBR] = "clfxbr",
+ [LONG_INSN_CLFXTR] = "clfxtr",
+ [LONG_INSN_CLGDBR] = "clgdbr",
+ [LONG_INSN_CLGDTR] = "clgdtr",
+ [LONG_INSN_CLGEBR] = "clgebr",
+ [LONG_INSN_CLGFRL] = "clgfrl",
+ [LONG_INSN_CLGHRL] = "clghrl",
+ [LONG_INSN_CLGHSI] = "clghsi",
+ [LONG_INSN_CLGXBR] = "clgxbr",
+ [LONG_INSN_CLGXTR] = "clgxtr",
+ [LONG_INSN_CLHHSI] = "clhhsi",
+ [LONG_INSN_CXFBRA] = "cxfbra",
+ [LONG_INSN_CXGBRA] = "cxgbra",
+ [LONG_INSN_CXGTRA] = "cxgtra",
+ [LONG_INSN_CXLFBR] = "cxlfbr",
+ [LONG_INSN_CXLFTR] = "cxlftr",
+ [LONG_INSN_CXLGBR] = "cxlgbr",
+ [LONG_INSN_CXLGTR] = "cxlgtr",
+ [LONG_INSN_FIDBRA] = "fidbra",
+ [LONG_INSN_FIEBRA] = "fiebra",
+ [LONG_INSN_FIXBRA] = "fixbra",
+ [LONG_INSN_LDXBRA] = "ldxbra",
+ [LONG_INSN_LEDBRA] = "ledbra",
+ [LONG_INSN_LEXBRA] = "lexbra",
+ [LONG_INSN_LLGFAT] = "llgfat",
+ [LONG_INSN_LLGFRL] = "llgfrl",
+ [LONG_INSN_LLGHRL] = "llghrl",
+ [LONG_INSN_LLGTAT] = "llgtat",
+ [LONG_INSN_POPCNT] = "popcnt",
+ [LONG_INSN_RIEMIT] = "riemit",
+ [LONG_INSN_RINEXT] = "rinext",
+ [LONG_INSN_RISBGN] = "risbgn",
+ [LONG_INSN_RISBHG] = "risbhg",
+ [LONG_INSN_RISBLG] = "risblg",
+ [LONG_INSN_SLHHHR] = "slhhhr",
+ [LONG_INSN_SLHHLR] = "slhhlr",
+ [LONG_INSN_TABORT] = "tabort",
+ [LONG_INSN_TBEGIN] = "tbegin",
+ [LONG_INSN_TBEGINC] = "tbeginc",
+ [LONG_INSN_PCISTG] = "pcistg",
+ [LONG_INSN_MPCIFC] = "mpcifc",
+ [LONG_INSN_STPCIFC] = "stpcifc",
+ [LONG_INSN_PCISTB] = "pcistb",
+ [LONG_INSN_VPOPCT] = "vpopct",
+ [LONG_INSN_VERLLV] = "verllv",
+ [LONG_INSN_VESRAV] = "vesrav",
+ [LONG_INSN_VESRLV] = "vesrlv",
+ [LONG_INSN_VSBCBI] = "vsbcbi",
+ [LONG_INSN_STCCTM] = "stcctm",
+};
+
+static struct s390_insn opcode[] = {
+ { "bprp", 0xc5, INSTR_MII_UPI },
+ { "bpp", 0xc7, INSTR_SMI_U0RDP },
+ { "trtr", 0xd0, INSTR_SS_L0RDRD },
+ { "lmd", 0xef, INSTR_SS_RRRDRD3 },
+ { "spm", 0x04, INSTR_RR_R0 },
+ { "balr", 0x05, INSTR_RR_RR },
+ { "bctr", 0x06, INSTR_RR_RR },
+ { "bcr", 0x07, INSTR_RR_UR },
+ { "svc", 0x0a, INSTR_RR_U0 },
+ { "bsm", 0x0b, INSTR_RR_RR },
+ { "bassm", 0x0c, INSTR_RR_RR },
+ { "basr", 0x0d, INSTR_RR_RR },
+ { "mvcl", 0x0e, INSTR_RR_RR },
+ { "clcl", 0x0f, INSTR_RR_RR },
+ { "lpr", 0x10, INSTR_RR_RR },
+ { "lnr", 0x11, INSTR_RR_RR },
+ { "ltr", 0x12, INSTR_RR_RR },
+ { "lcr", 0x13, INSTR_RR_RR },
+ { "nr", 0x14, INSTR_RR_RR },
+ { "clr", 0x15, INSTR_RR_RR },
+ { "or", 0x16, INSTR_RR_RR },
+ { "xr", 0x17, INSTR_RR_RR },
+ { "lr", 0x18, INSTR_RR_RR },
+ { "cr", 0x19, INSTR_RR_RR },
+ { "ar", 0x1a, INSTR_RR_RR },
+ { "sr", 0x1b, INSTR_RR_RR },
+ { "mr", 0x1c, INSTR_RR_RR },
+ { "dr", 0x1d, INSTR_RR_RR },
+ { "alr", 0x1e, INSTR_RR_RR },
+ { "slr", 0x1f, INSTR_RR_RR },
+ { "lpdr", 0x20, INSTR_RR_FF },
+ { "lndr", 0x21, INSTR_RR_FF },
+ { "ltdr", 0x22, INSTR_RR_FF },
+ { "lcdr", 0x23, INSTR_RR_FF },
+ { "hdr", 0x24, INSTR_RR_FF },
+ { "ldxr", 0x25, INSTR_RR_FF },
+ { "mxr", 0x26, INSTR_RR_FF },
+ { "mxdr", 0x27, INSTR_RR_FF },
+ { "ldr", 0x28, INSTR_RR_FF },
+ { "cdr", 0x29, INSTR_RR_FF },
+ { "adr", 0x2a, INSTR_RR_FF },
+ { "sdr", 0x2b, INSTR_RR_FF },
+ { "mdr", 0x2c, INSTR_RR_FF },
+ { "ddr", 0x2d, INSTR_RR_FF },
+ { "awr", 0x2e, INSTR_RR_FF },
+ { "swr", 0x2f, INSTR_RR_FF },
+ { "lper", 0x30, INSTR_RR_FF },
+ { "lner", 0x31, INSTR_RR_FF },
+ { "lter", 0x32, INSTR_RR_FF },
+ { "lcer", 0x33, INSTR_RR_FF },
+ { "her", 0x34, INSTR_RR_FF },
+ { "ledr", 0x35, INSTR_RR_FF },
+ { "axr", 0x36, INSTR_RR_FF },
+ { "sxr", 0x37, INSTR_RR_FF },
+ { "ler", 0x38, INSTR_RR_FF },
+ { "cer", 0x39, INSTR_RR_FF },
+ { "aer", 0x3a, INSTR_RR_FF },
+ { "ser", 0x3b, INSTR_RR_FF },
+ { "mder", 0x3c, INSTR_RR_FF },
+ { "der", 0x3d, INSTR_RR_FF },
+ { "aur", 0x3e, INSTR_RR_FF },
+ { "sur", 0x3f, INSTR_RR_FF },
+ { "sth", 0x40, INSTR_RX_RRRD },
+ { "la", 0x41, INSTR_RX_RRRD },
+ { "stc", 0x42, INSTR_RX_RRRD },
+ { "ic", 0x43, INSTR_RX_RRRD },
+ { "ex", 0x44, INSTR_RX_RRRD },
+ { "bal", 0x45, INSTR_RX_RRRD },
+ { "bct", 0x46, INSTR_RX_RRRD },
+ { "bc", 0x47, INSTR_RX_URRD },
+ { "lh", 0x48, INSTR_RX_RRRD },
+ { "ch", 0x49, INSTR_RX_RRRD },
+ { "ah", 0x4a, INSTR_RX_RRRD },
+ { "sh", 0x4b, INSTR_RX_RRRD },
+ { "mh", 0x4c, INSTR_RX_RRRD },
+ { "bas", 0x4d, INSTR_RX_RRRD },
+ { "cvd", 0x4e, INSTR_RX_RRRD },
+ { "cvb", 0x4f, INSTR_RX_RRRD },
+ { "st", 0x50, INSTR_RX_RRRD },
+ { "lae", 0x51, INSTR_RX_RRRD },
+ { "n", 0x54, INSTR_RX_RRRD },
+ { "cl", 0x55, INSTR_RX_RRRD },
+ { "o", 0x56, INSTR_RX_RRRD },
+ { "x", 0x57, INSTR_RX_RRRD },
+ { "l", 0x58, INSTR_RX_RRRD },
+ { "c", 0x59, INSTR_RX_RRRD },
+ { "a", 0x5a, INSTR_RX_RRRD },
+ { "s", 0x5b, INSTR_RX_RRRD },
+ { "m", 0x5c, INSTR_RX_RRRD },
+ { "d", 0x5d, INSTR_RX_RRRD },
+ { "al", 0x5e, INSTR_RX_RRRD },
+ { "sl", 0x5f, INSTR_RX_RRRD },
+ { "std", 0x60, INSTR_RX_FRRD },
+ { "mxd", 0x67, INSTR_RX_FRRD },
+ { "ld", 0x68, INSTR_RX_FRRD },
+ { "cd", 0x69, INSTR_RX_FRRD },
+ { "ad", 0x6a, INSTR_RX_FRRD },
+ { "sd", 0x6b, INSTR_RX_FRRD },
+ { "md", 0x6c, INSTR_RX_FRRD },
+ { "dd", 0x6d, INSTR_RX_FRRD },
+ { "aw", 0x6e, INSTR_RX_FRRD },
+ { "sw", 0x6f, INSTR_RX_FRRD },
+ { "ste", 0x70, INSTR_RX_FRRD },
+ { "ms", 0x71, INSTR_RX_RRRD },
+ { "le", 0x78, INSTR_RX_FRRD },
+ { "ce", 0x79, INSTR_RX_FRRD },
+ { "ae", 0x7a, INSTR_RX_FRRD },
+ { "se", 0x7b, INSTR_RX_FRRD },
+ { "mde", 0x7c, INSTR_RX_FRRD },
+ { "de", 0x7d, INSTR_RX_FRRD },
+ { "au", 0x7e, INSTR_RX_FRRD },
+ { "su", 0x7f, INSTR_RX_FRRD },
+ { "ssm", 0x80, INSTR_S_RD },
+ { "lpsw", 0x82, INSTR_S_RD },
+ { "diag", 0x83, INSTR_RS_RRRD },
+ { "brxh", 0x84, INSTR_RSI_RRP },
+ { "brxle", 0x85, INSTR_RSI_RRP },
+ { "bxh", 0x86, INSTR_RS_RRRD },
+ { "bxle", 0x87, INSTR_RS_RRRD },
+ { "srl", 0x88, INSTR_RS_R0RD },
+ { "sll", 0x89, INSTR_RS_R0RD },
+ { "sra", 0x8a, INSTR_RS_R0RD },
+ { "sla", 0x8b, INSTR_RS_R0RD },
+ { "srdl", 0x8c, INSTR_RS_R0RD },
+ { "sldl", 0x8d, INSTR_RS_R0RD },
+ { "srda", 0x8e, INSTR_RS_R0RD },
+ { "slda", 0x8f, INSTR_RS_R0RD },
+ { "stm", 0x90, INSTR_RS_RRRD },
+ { "tm", 0x91, INSTR_SI_URD },
+ { "mvi", 0x92, INSTR_SI_URD },
+ { "ts", 0x93, INSTR_S_RD },
+ { "ni", 0x94, INSTR_SI_URD },
+ { "cli", 0x95, INSTR_SI_URD },
+ { "oi", 0x96, INSTR_SI_URD },
+ { "xi", 0x97, INSTR_SI_URD },
+ { "lm", 0x98, INSTR_RS_RRRD },
+ { "trace", 0x99, INSTR_RS_RRRD },
+ { "lam", 0x9a, INSTR_RS_AARD },
+ { "stam", 0x9b, INSTR_RS_AARD },
+ { "mvcle", 0xa8, INSTR_RS_RRRD },
+ { "clcle", 0xa9, INSTR_RS_RRRD },
+ { "stnsm", 0xac, INSTR_SI_URD },
+ { "stosm", 0xad, INSTR_SI_URD },
+ { "sigp", 0xae, INSTR_RS_RRRD },
+ { "mc", 0xaf, INSTR_SI_URD },
+ { "lra", 0xb1, INSTR_RX_RRRD },
+ { "stctl", 0xb6, INSTR_RS_CCRD },
+ { "lctl", 0xb7, INSTR_RS_CCRD },
+ { "cs", 0xba, INSTR_RS_RRRD },
+ { "cds", 0xbb, INSTR_RS_RRRD },
+ { "clm", 0xbd, INSTR_RS_RURD },
+ { "stcm", 0xbe, INSTR_RS_RURD },
+ { "icm", 0xbf, INSTR_RS_RURD },
+ { "mvn", 0xd1, INSTR_SS_L0RDRD },
+ { "mvc", 0xd2, INSTR_SS_L0RDRD },
+ { "mvz", 0xd3, INSTR_SS_L0RDRD },
+ { "nc", 0xd4, INSTR_SS_L0RDRD },
+ { "clc", 0xd5, INSTR_SS_L0RDRD },
+ { "oc", 0xd6, INSTR_SS_L0RDRD },
+ { "xc", 0xd7, INSTR_SS_L0RDRD },
+ { "mvck", 0xd9, INSTR_SS_RRRDRD },
+ { "mvcp", 0xda, INSTR_SS_RRRDRD },
+ { "mvcs", 0xdb, INSTR_SS_RRRDRD },
+ { "tr", 0xdc, INSTR_SS_L0RDRD },
+ { "trt", 0xdd, INSTR_SS_L0RDRD },
+ { "ed", 0xde, INSTR_SS_L0RDRD },
+ { "edmk", 0xdf, INSTR_SS_L0RDRD },
+ { "pku", 0xe1, INSTR_SS_L0RDRD },
+ { "unpku", 0xe2, INSTR_SS_L0RDRD },
+ { "mvcin", 0xe8, INSTR_SS_L0RDRD },
+ { "pka", 0xe9, INSTR_SS_L0RDRD },
+ { "unpka", 0xea, INSTR_SS_L0RDRD },
+ { "plo", 0xee, INSTR_SS_RRRDRD2 },
+ { "srp", 0xf0, INSTR_SS_LIRDRD },
+ { "mvo", 0xf1, INSTR_SS_LLRDRD },
+ { "pack", 0xf2, INSTR_SS_LLRDRD },
+ { "unpk", 0xf3, INSTR_SS_LLRDRD },
+ { "zap", 0xf8, INSTR_SS_LLRDRD },
+ { "cp", 0xf9, INSTR_SS_LLRDRD },
+ { "ap", 0xfa, INSTR_SS_LLRDRD },
+ { "sp", 0xfb, INSTR_SS_LLRDRD },
+ { "mp", 0xfc, INSTR_SS_LLRDRD },
+ { "dp", 0xfd, INSTR_SS_LLRDRD },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_01[] = {
+ { "ptff", 0x04, INSTR_E },
+ { "pfpo", 0x0a, INSTR_E },
+ { "sam64", 0x0e, INSTR_E },
+ { "pr", 0x01, INSTR_E },
+ { "upt", 0x02, INSTR_E },
+ { "sckpf", 0x07, INSTR_E },
+ { "tam", 0x0b, INSTR_E },
+ { "sam24", 0x0c, INSTR_E },
+ { "sam31", 0x0d, INSTR_E },
+ { "trap2", 0xff, INSTR_E },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_a5[] = {
+ { "iihh", 0x00, INSTR_RI_RU },
+ { "iihl", 0x01, INSTR_RI_RU },
+ { "iilh", 0x02, INSTR_RI_RU },
+ { "iill", 0x03, INSTR_RI_RU },
+ { "nihh", 0x04, INSTR_RI_RU },
+ { "nihl", 0x05, INSTR_RI_RU },
+ { "nilh", 0x06, INSTR_RI_RU },
+ { "nill", 0x07, INSTR_RI_RU },
+ { "oihh", 0x08, INSTR_RI_RU },
+ { "oihl", 0x09, INSTR_RI_RU },
+ { "oilh", 0x0a, INSTR_RI_RU },
+ { "oill", 0x0b, INSTR_RI_RU },
+ { "llihh", 0x0c, INSTR_RI_RU },
+ { "llihl", 0x0d, INSTR_RI_RU },
+ { "llilh", 0x0e, INSTR_RI_RU },
+ { "llill", 0x0f, INSTR_RI_RU },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_a7[] = {
+ { "tmhh", 0x02, INSTR_RI_RU },
+ { "tmhl", 0x03, INSTR_RI_RU },
+ { "brctg", 0x07, INSTR_RI_RP },
+ { "lghi", 0x09, INSTR_RI_RI },
+ { "aghi", 0x0b, INSTR_RI_RI },
+ { "mghi", 0x0d, INSTR_RI_RI },
+ { "cghi", 0x0f, INSTR_RI_RI },
+ { "tmlh", 0x00, INSTR_RI_RU },
+ { "tmll", 0x01, INSTR_RI_RU },
+ { "brc", 0x04, INSTR_RI_UP },
+ { "bras", 0x05, INSTR_RI_RP },
+ { "brct", 0x06, INSTR_RI_RP },
+ { "lhi", 0x08, INSTR_RI_RI },
+ { "ahi", 0x0a, INSTR_RI_RI },
+ { "mhi", 0x0c, INSTR_RI_RI },
+ { "chi", 0x0e, INSTR_RI_RI },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_aa[] = {
+ { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
+ { "rion", 0x01, INSTR_RI_RI },
+ { "tric", 0x02, INSTR_RI_RI },
+ { "rioff", 0x03, INSTR_RI_RI },
+ { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_b2[] = {
+ { "stckf", 0x7c, INSTR_S_RD },
+ { "lpp", 0x80, INSTR_S_RD },
+ { "lcctl", 0x84, INSTR_S_RD },
+ { "lpctl", 0x85, INSTR_S_RD },
+ { "qsi", 0x86, INSTR_S_RD },
+ { "lsctl", 0x87, INSTR_S_RD },
+ { "qctri", 0x8e, INSTR_S_RD },
+ { "stfle", 0xb0, INSTR_S_RD },
+ { "lpswe", 0xb2, INSTR_S_RD },
+ { "srnmb", 0xb8, INSTR_S_RD },
+ { "srnmt", 0xb9, INSTR_S_RD },
+ { "lfas", 0xbd, INSTR_S_RD },
+ { "scctr", 0xe0, INSTR_RRE_RR },
+ { "spctr", 0xe1, INSTR_RRE_RR },
+ { "ecctr", 0xe4, INSTR_RRE_RR },
+ { "epctr", 0xe5, INSTR_RRE_RR },
+ { "ppa", 0xe8, INSTR_RRF_U0RR },
+ { "etnd", 0xec, INSTR_RRE_R0 },
+ { "ecpga", 0xed, INSTR_RRE_RR },
+ { "tend", 0xf8, INSTR_S_00 },
+ { "niai", 0xfa, INSTR_IE_UU },
+ { { 0, LONG_INSN_TABORT }, 0xfc, INSTR_S_RD },
+ { "stidp", 0x02, INSTR_S_RD },
+ { "sck", 0x04, INSTR_S_RD },
+ { "stck", 0x05, INSTR_S_RD },
+ { "sckc", 0x06, INSTR_S_RD },
+ { "stckc", 0x07, INSTR_S_RD },
+ { "spt", 0x08, INSTR_S_RD },
+ { "stpt", 0x09, INSTR_S_RD },
+ { "spka", 0x0a, INSTR_S_RD },
+ { "ipk", 0x0b, INSTR_S_00 },
+ { "ptlb", 0x0d, INSTR_S_00 },
+ { "spx", 0x10, INSTR_S_RD },
+ { "stpx", 0x11, INSTR_S_RD },
+ { "stap", 0x12, INSTR_S_RD },
+ { "sie", 0x14, INSTR_S_RD },
+ { "pc", 0x18, INSTR_S_RD },
+ { "sac", 0x19, INSTR_S_RD },
+ { "cfc", 0x1a, INSTR_S_RD },
+ { "servc", 0x20, INSTR_RRE_RR },
+ { "ipte", 0x21, INSTR_RRE_RR },
+ { "ipm", 0x22, INSTR_RRE_R0 },
+ { "ivsk", 0x23, INSTR_RRE_RR },
+ { "iac", 0x24, INSTR_RRE_R0 },
+ { "ssar", 0x25, INSTR_RRE_R0 },
+ { "epar", 0x26, INSTR_RRE_R0 },
+ { "esar", 0x27, INSTR_RRE_R0 },
+ { "pt", 0x28, INSTR_RRE_RR },
+ { "iske", 0x29, INSTR_RRE_RR },
+ { "rrbe", 0x2a, INSTR_RRE_RR },
+ { "sske", 0x2b, INSTR_RRF_M0RR },
+ { "tb", 0x2c, INSTR_RRE_0R },
+ { "dxr", 0x2d, INSTR_RRE_FF },
+ { "pgin", 0x2e, INSTR_RRE_RR },
+ { "pgout", 0x2f, INSTR_RRE_RR },
+ { "csch", 0x30, INSTR_S_00 },
+ { "hsch", 0x31, INSTR_S_00 },
+ { "msch", 0x32, INSTR_S_RD },
+ { "ssch", 0x33, INSTR_S_RD },
+ { "stsch", 0x34, INSTR_S_RD },
+ { "tsch", 0x35, INSTR_S_RD },
+ { "tpi", 0x36, INSTR_S_RD },
+ { "sal", 0x37, INSTR_S_00 },
+ { "rsch", 0x38, INSTR_S_00 },
+ { "stcrw", 0x39, INSTR_S_RD },
+ { "stcps", 0x3a, INSTR_S_RD },
+ { "rchp", 0x3b, INSTR_S_00 },
+ { "schm", 0x3c, INSTR_S_00 },
+ { "bakr", 0x40, INSTR_RRE_RR },
+ { "cksm", 0x41, INSTR_RRE_RR },
+ { "sqdr", 0x44, INSTR_RRE_FF },
+ { "sqer", 0x45, INSTR_RRE_FF },
+ { "stura", 0x46, INSTR_RRE_RR },
+ { "msta", 0x47, INSTR_RRE_R0 },
+ { "palb", 0x48, INSTR_RRE_00 },
+ { "ereg", 0x49, INSTR_RRE_RR },
+ { "esta", 0x4a, INSTR_RRE_RR },
+ { "lura", 0x4b, INSTR_RRE_RR },
+ { "tar", 0x4c, INSTR_RRE_AR },
+ { "cpya", 0x4d, INSTR_RRE_AA },
+ { "sar", 0x4e, INSTR_RRE_AR },
+ { "ear", 0x4f, INSTR_RRE_RA },
+ { "csp", 0x50, INSTR_RRE_RR },
+ { "msr", 0x52, INSTR_RRE_RR },
+ { "mvpg", 0x54, INSTR_RRE_RR },
+ { "mvst", 0x55, INSTR_RRE_RR },
+ { "cuse", 0x57, INSTR_RRE_RR },
+ { "bsg", 0x58, INSTR_RRE_RR },
+ { "bsa", 0x5a, INSTR_RRE_RR },
+ { "clst", 0x5d, INSTR_RRE_RR },
+ { "srst", 0x5e, INSTR_RRE_RR },
+ { "cmpsc", 0x63, INSTR_RRE_RR },
+ { "siga", 0x74, INSTR_S_RD },
+ { "xsch", 0x76, INSTR_S_00 },
+ { "rp", 0x77, INSTR_S_RD },
+ { "stcke", 0x78, INSTR_S_RD },
+ { "sacf", 0x79, INSTR_S_RD },
+ { "stsi", 0x7d, INSTR_S_RD },
+ { "srnm", 0x99, INSTR_S_RD },
+ { "stfpc", 0x9c, INSTR_S_RD },
+ { "lfpc", 0x9d, INSTR_S_RD },
+ { "tre", 0xa5, INSTR_RRE_RR },
+ { "cuutf", 0xa6, INSTR_RRF_M0RR },
+ { "cutfu", 0xa7, INSTR_RRF_M0RR },
+ { "stfl", 0xb1, INSTR_S_RD },
+ { "trap4", 0xff, INSTR_S_RD },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_b3[] = {
+ { "maylr", 0x38, INSTR_RRF_F0FF },
+ { "mylr", 0x39, INSTR_RRF_F0FF },
+ { "mayr", 0x3a, INSTR_RRF_F0FF },
+ { "myr", 0x3b, INSTR_RRF_F0FF },
+ { "mayhr", 0x3c, INSTR_RRF_F0FF },
+ { "myhr", 0x3d, INSTR_RRF_F0FF },
+ { "lpdfr", 0x70, INSTR_RRE_FF },
+ { "lndfr", 0x71, INSTR_RRE_FF },
+ { "cpsdr", 0x72, INSTR_RRF_F0FF2 },
+ { "lcdfr", 0x73, INSTR_RRE_FF },
+ { "sfasr", 0x85, INSTR_RRE_R0 },
+ { { 0, LONG_INSN_CELFBR }, 0x90, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDLFBR }, 0x91, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CXLFBR }, 0x92, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CEFBRA }, 0x94, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDFBRA }, 0x95, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CXFBRA }, 0x96, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CFEBRA }, 0x98, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CFDBRA }, 0x99, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CFXBRA }, 0x9a, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CLFEBR }, 0x9c, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLFDBR }, 0x9d, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLFXBR }, 0x9e, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CELGBR }, 0xa0, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDLGBR }, 0xa1, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CXLGBR }, 0xa2, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CEGBRA }, 0xa4, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDGBRA }, 0xa5, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CXGBRA }, 0xa6, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CGEBRA }, 0xa8, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CGDBRA }, 0xa9, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CGXBRA }, 0xaa, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CLGEBR }, 0xac, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLGDBR }, 0xad, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLGXBR }, 0xae, INSTR_RRF_UUFR },
+ { "ldgr", 0xc1, INSTR_RRE_FR },
+ { "cegr", 0xc4, INSTR_RRE_FR },
+ { "cdgr", 0xc5, INSTR_RRE_FR },
+ { "cxgr", 0xc6, INSTR_RRE_FR },
+ { "cger", 0xc8, INSTR_RRF_U0RF },
+ { "cgdr", 0xc9, INSTR_RRF_U0RF },
+ { "cgxr", 0xca, INSTR_RRF_U0RF },
+ { "lgdr", 0xcd, INSTR_RRE_RF },
+ { "mdtra", 0xd0, INSTR_RRF_FUFF2 },
+ { "ddtra", 0xd1, INSTR_RRF_FUFF2 },
+ { "adtra", 0xd2, INSTR_RRF_FUFF2 },
+ { "sdtra", 0xd3, INSTR_RRF_FUFF2 },
+ { "ldetr", 0xd4, INSTR_RRF_0UFF },
+ { "ledtr", 0xd5, INSTR_RRF_UUFF },
+ { "ltdtr", 0xd6, INSTR_RRE_FF },
+ { "fidtr", 0xd7, INSTR_RRF_UUFF },
+ { "mxtra", 0xd8, INSTR_RRF_FUFF2 },
+ { "dxtra", 0xd9, INSTR_RRF_FUFF2 },
+ { "axtra", 0xda, INSTR_RRF_FUFF2 },
+ { "sxtra", 0xdb, INSTR_RRF_FUFF2 },
+ { "lxdtr", 0xdc, INSTR_RRF_0UFF },
+ { "ldxtr", 0xdd, INSTR_RRF_UUFF },
+ { "ltxtr", 0xde, INSTR_RRE_FF },
+ { "fixtr", 0xdf, INSTR_RRF_UUFF },
+ { "kdtr", 0xe0, INSTR_RRE_FF },
+ { { 0, LONG_INSN_CGDTRA }, 0xe1, INSTR_RRF_UURF },
+ { "cudtr", 0xe2, INSTR_RRE_RF },
+ { "csdtr", 0xe3, INSTR_RRE_RF },
+ { "cdtr", 0xe4, INSTR_RRE_FF },
+ { "eedtr", 0xe5, INSTR_RRE_RF },
+ { "esdtr", 0xe7, INSTR_RRE_RF },
+ { "kxtr", 0xe8, INSTR_RRE_FF },
+ { { 0, LONG_INSN_CGXTRA }, 0xe9, INSTR_RRF_UUFR },
+ { "cuxtr", 0xea, INSTR_RRE_RF },
+ { "csxtr", 0xeb, INSTR_RRE_RF },
+ { "cxtr", 0xec, INSTR_RRE_FF },
+ { "eextr", 0xed, INSTR_RRE_RF },
+ { "esxtr", 0xef, INSTR_RRE_RF },
+ { { 0, LONG_INSN_CDGTRA }, 0xf1, INSTR_RRF_UUFR },
+ { "cdutr", 0xf2, INSTR_RRE_FR },
+ { "cdstr", 0xf3, INSTR_RRE_FR },
+ { "cedtr", 0xf4, INSTR_RRE_FF },
+ { "qadtr", 0xf5, INSTR_RRF_FUFF },
+ { "iedtr", 0xf6, INSTR_RRF_F0FR },
+ { "rrdtr", 0xf7, INSTR_RRF_FFRU },
+ { { 0, LONG_INSN_CXGTRA }, 0xf9, INSTR_RRF_UURF },
+ { "cxutr", 0xfa, INSTR_RRE_FR },
+ { "cxstr", 0xfb, INSTR_RRE_FR },
+ { "cextr", 0xfc, INSTR_RRE_FF },
+ { "qaxtr", 0xfd, INSTR_RRF_FUFF },
+ { "iextr", 0xfe, INSTR_RRF_F0FR },
+ { "rrxtr", 0xff, INSTR_RRF_FFRU },
+ { "lpebr", 0x00, INSTR_RRE_FF },
+ { "lnebr", 0x01, INSTR_RRE_FF },
+ { "ltebr", 0x02, INSTR_RRE_FF },
+ { "lcebr", 0x03, INSTR_RRE_FF },
+ { "ldebr", 0x04, INSTR_RRE_FF },
+ { "lxdbr", 0x05, INSTR_RRE_FF },
+ { "lxebr", 0x06, INSTR_RRE_FF },
+ { "mxdbr", 0x07, INSTR_RRE_FF },
+ { "kebr", 0x08, INSTR_RRE_FF },
+ { "cebr", 0x09, INSTR_RRE_FF },
+ { "aebr", 0x0a, INSTR_RRE_FF },
+ { "sebr", 0x0b, INSTR_RRE_FF },
+ { "mdebr", 0x0c, INSTR_RRE_FF },
+ { "debr", 0x0d, INSTR_RRE_FF },
+ { "maebr", 0x0e, INSTR_RRF_F0FF },
+ { "msebr", 0x0f, INSTR_RRF_F0FF },
+ { "lpdbr", 0x10, INSTR_RRE_FF },
+ { "lndbr", 0x11, INSTR_RRE_FF },
+ { "ltdbr", 0x12, INSTR_RRE_FF },
+ { "lcdbr", 0x13, INSTR_RRE_FF },
+ { "sqebr", 0x14, INSTR_RRE_FF },
+ { "sqdbr", 0x15, INSTR_RRE_FF },
+ { "sqxbr", 0x16, INSTR_RRE_FF },
+ { "meebr", 0x17, INSTR_RRE_FF },
+ { "kdbr", 0x18, INSTR_RRE_FF },
+ { "cdbr", 0x19, INSTR_RRE_FF },
+ { "adbr", 0x1a, INSTR_RRE_FF },
+ { "sdbr", 0x1b, INSTR_RRE_FF },
+ { "mdbr", 0x1c, INSTR_RRE_FF },
+ { "ddbr", 0x1d, INSTR_RRE_FF },
+ { "madbr", 0x1e, INSTR_RRF_F0FF },
+ { "msdbr", 0x1f, INSTR_RRF_F0FF },
+ { "lder", 0x24, INSTR_RRE_FF },
+ { "lxdr", 0x25, INSTR_RRE_FF },
+ { "lxer", 0x26, INSTR_RRE_FF },
+ { "maer", 0x2e, INSTR_RRF_F0FF },
+ { "mser", 0x2f, INSTR_RRF_F0FF },
+ { "sqxr", 0x36, INSTR_RRE_FF },
+ { "meer", 0x37, INSTR_RRE_FF },
+ { "madr", 0x3e, INSTR_RRF_F0FF },
+ { "msdr", 0x3f, INSTR_RRF_F0FF },
+ { "lpxbr", 0x40, INSTR_RRE_FF },
+ { "lnxbr", 0x41, INSTR_RRE_FF },
+ { "ltxbr", 0x42, INSTR_RRE_FF },
+ { "lcxbr", 0x43, INSTR_RRE_FF },
+ { { 0, LONG_INSN_LEDBRA }, 0x44, INSTR_RRF_UUFF },
+ { { 0, LONG_INSN_LDXBRA }, 0x45, INSTR_RRF_UUFF },
+ { { 0, LONG_INSN_LEXBRA }, 0x46, INSTR_RRF_UUFF },
+ { { 0, LONG_INSN_FIXBRA }, 0x47, INSTR_RRF_UUFF },
+ { "kxbr", 0x48, INSTR_RRE_FF },
+ { "cxbr", 0x49, INSTR_RRE_FF },
+ { "axbr", 0x4a, INSTR_RRE_FF },
+ { "sxbr", 0x4b, INSTR_RRE_FF },
+ { "mxbr", 0x4c, INSTR_RRE_FF },
+ { "dxbr", 0x4d, INSTR_RRE_FF },
+ { "tbedr", 0x50, INSTR_RRF_U0FF },
+ { "tbdr", 0x51, INSTR_RRF_U0FF },
+ { "diebr", 0x53, INSTR_RRF_FUFF },
+ { { 0, LONG_INSN_FIEBRA }, 0x57, INSTR_RRF_UUFF },
+ { "thder", 0x58, INSTR_RRE_FF },
+ { "thdr", 0x59, INSTR_RRE_FF },
+ { "didbr", 0x5b, INSTR_RRF_FUFF },
+ { { 0, LONG_INSN_FIDBRA }, 0x5f, INSTR_RRF_UUFF },
+ { "lpxr", 0x60, INSTR_RRE_FF },
+ { "lnxr", 0x61, INSTR_RRE_FF },
+ { "ltxr", 0x62, INSTR_RRE_FF },
+ { "lcxr", 0x63, INSTR_RRE_FF },
+ { "lxr", 0x65, INSTR_RRE_FF },
+ { "lexr", 0x66, INSTR_RRE_FF },
+ { "fixr", 0x67, INSTR_RRE_FF },
+ { "cxr", 0x69, INSTR_RRE_FF },
+ { "lzer", 0x74, INSTR_RRE_F0 },
+ { "lzdr", 0x75, INSTR_RRE_F0 },
+ { "lzxr", 0x76, INSTR_RRE_F0 },
+ { "fier", 0x77, INSTR_RRE_FF },
+ { "fidr", 0x7f, INSTR_RRE_FF },
+ { "sfpc", 0x84, INSTR_RRE_RR_OPT },
+ { "efpc", 0x8c, INSTR_RRE_RR_OPT },
+ { "cefbr", 0x94, INSTR_RRE_RF },
+ { "cdfbr", 0x95, INSTR_RRE_RF },
+ { "cxfbr", 0x96, INSTR_RRE_RF },
+ { "cfebr", 0x98, INSTR_RRF_U0RF },
+ { "cfdbr", 0x99, INSTR_RRF_U0RF },
+ { "cfxbr", 0x9a, INSTR_RRF_U0RF },
+ { "cefr", 0xb4, INSTR_RRE_FR },
+ { "cdfr", 0xb5, INSTR_RRE_FR },
+ { "cxfr", 0xb6, INSTR_RRE_FR },
+ { "cfer", 0xb8, INSTR_RRF_U0RF },
+ { "cfdr", 0xb9, INSTR_RRF_U0RF },
+ { "cfxr", 0xba, INSTR_RRF_U0RF },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_b9[] = {
+ { "lpgr", 0x00, INSTR_RRE_RR },
+ { "lngr", 0x01, INSTR_RRE_RR },
+ { "ltgr", 0x02, INSTR_RRE_RR },
+ { "lcgr", 0x03, INSTR_RRE_RR },
+ { "lgr", 0x04, INSTR_RRE_RR },
+ { "lurag", 0x05, INSTR_RRE_RR },
+ { "lgbr", 0x06, INSTR_RRE_RR },
+ { "lghr", 0x07, INSTR_RRE_RR },
+ { "agr", 0x08, INSTR_RRE_RR },
+ { "sgr", 0x09, INSTR_RRE_RR },
+ { "algr", 0x0a, INSTR_RRE_RR },
+ { "slgr", 0x0b, INSTR_RRE_RR },
+ { "msgr", 0x0c, INSTR_RRE_RR },
+ { "dsgr", 0x0d, INSTR_RRE_RR },
+ { "eregg", 0x0e, INSTR_RRE_RR },
+ { "lrvgr", 0x0f, INSTR_RRE_RR },
+ { "lpgfr", 0x10, INSTR_RRE_RR },
+ { "lngfr", 0x11, INSTR_RRE_RR },
+ { "ltgfr", 0x12, INSTR_RRE_RR },
+ { "lcgfr", 0x13, INSTR_RRE_RR },
+ { "lgfr", 0x14, INSTR_RRE_RR },
+ { "llgfr", 0x16, INSTR_RRE_RR },
+ { "llgtr", 0x17, INSTR_RRE_RR },
+ { "agfr", 0x18, INSTR_RRE_RR },
+ { "sgfr", 0x19, INSTR_RRE_RR },
+ { "algfr", 0x1a, INSTR_RRE_RR },
+ { "slgfr", 0x1b, INSTR_RRE_RR },
+ { "msgfr", 0x1c, INSTR_RRE_RR },
+ { "dsgfr", 0x1d, INSTR_RRE_RR },
+ { "cgr", 0x20, INSTR_RRE_RR },
+ { "clgr", 0x21, INSTR_RRE_RR },
+ { "sturg", 0x25, INSTR_RRE_RR },
+ { "lbr", 0x26, INSTR_RRE_RR },
+ { "lhr", 0x27, INSTR_RRE_RR },
+ { "cgfr", 0x30, INSTR_RRE_RR },
+ { "clgfr", 0x31, INSTR_RRE_RR },
+ { "cfdtr", 0x41, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLGDTR }, 0x42, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLFDTR }, 0x43, INSTR_RRF_UURF },
+ { "bctgr", 0x46, INSTR_RRE_RR },
+ { "cfxtr", 0x49, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CLGXTR }, 0x4a, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CLFXTR }, 0x4b, INSTR_RRF_UUFR },
+ { "cdftr", 0x51, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDLGTR }, 0x52, INSTR_RRF_UUFR },
+ { { 0, LONG_INSN_CDLFTR }, 0x53, INSTR_RRF_UUFR },
+ { "cxftr", 0x59, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CXLGTR }, 0x5a, INSTR_RRF_UURF },
+ { { 0, LONG_INSN_CXLFTR }, 0x5b, INSTR_RRF_UUFR },
+ { "cgrt", 0x60, INSTR_RRF_U0RR },
+ { "clgrt", 0x61, INSTR_RRF_U0RR },
+ { "crt", 0x72, INSTR_RRF_U0RR },
+ { "clrt", 0x73, INSTR_RRF_U0RR },
+ { "ngr", 0x80, INSTR_RRE_RR },
+ { "ogr", 0x81, INSTR_RRE_RR },
+ { "xgr", 0x82, INSTR_RRE_RR },
+ { "flogr", 0x83, INSTR_RRE_RR },
+ { "llgcr", 0x84, INSTR_RRE_RR },
+ { "llghr", 0x85, INSTR_RRE_RR },
+ { "mlgr", 0x86, INSTR_RRE_RR },
+ { "dlgr", 0x87, INSTR_RRE_RR },
+ { "alcgr", 0x88, INSTR_RRE_RR },
+ { "slbgr", 0x89, INSTR_RRE_RR },
+ { "cspg", 0x8a, INSTR_RRE_RR },
+ { "idte", 0x8e, INSTR_RRF_R0RR },
+ { "crdte", 0x8f, INSTR_RRF_RMRR },
+ { "llcr", 0x94, INSTR_RRE_RR },
+ { "llhr", 0x95, INSTR_RRE_RR },
+ { "esea", 0x9d, INSTR_RRE_R0 },
+ { "ptf", 0xa2, INSTR_RRE_R0 },
+ { "lptea", 0xaa, INSTR_RRF_RURR },
+ { "rrbm", 0xae, INSTR_RRE_RR },
+ { "pfmf", 0xaf, INSTR_RRE_RR },
+ { "cu14", 0xb0, INSTR_RRF_M0RR },
+ { "cu24", 0xb1, INSTR_RRF_M0RR },
+ { "cu41", 0xb2, INSTR_RRE_RR },
+ { "cu42", 0xb3, INSTR_RRE_RR },
+ { "trtre", 0xbd, INSTR_RRF_M0RR },
+ { "srstu", 0xbe, INSTR_RRE_RR },
+ { "trte", 0xbf, INSTR_RRF_M0RR },
+ { "ahhhr", 0xc8, INSTR_RRF_R0RR2 },
+ { "shhhr", 0xc9, INSTR_RRF_R0RR2 },
+ { { 0, LONG_INSN_ALHHHR }, 0xca, INSTR_RRF_R0RR2 },
+ { { 0, LONG_INSN_SLHHHR }, 0xcb, INSTR_RRF_R0RR2 },
+ { "chhr", 0xcd, INSTR_RRE_RR },
+ { "clhhr", 0xcf, INSTR_RRE_RR },
+ { { 0, LONG_INSN_PCISTG }, 0xd0, INSTR_RRE_RR },
+ { "pcilg", 0xd2, INSTR_RRE_RR },
+ { "rpcit", 0xd3, INSTR_RRE_RR },
+ { "ahhlr", 0xd8, INSTR_RRF_R0RR2 },
+ { "shhlr", 0xd9, INSTR_RRF_R0RR2 },
+ { { 0, LONG_INSN_ALHHLR }, 0xda, INSTR_RRF_R0RR2 },
+ { { 0, LONG_INSN_SLHHLR }, 0xdb, INSTR_RRF_R0RR2 },
+ { "chlr", 0xdd, INSTR_RRE_RR },
+ { "clhlr", 0xdf, INSTR_RRE_RR },
+ { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR },
+ { "locgr", 0xe2, INSTR_RRF_M0RR },
+ { "ngrk", 0xe4, INSTR_RRF_R0RR2 },
+ { "ogrk", 0xe6, INSTR_RRF_R0RR2 },
+ { "xgrk", 0xe7, INSTR_RRF_R0RR2 },
+ { "agrk", 0xe8, INSTR_RRF_R0RR2 },
+ { "sgrk", 0xe9, INSTR_RRF_R0RR2 },
+ { "algrk", 0xea, INSTR_RRF_R0RR2 },
+ { "slgrk", 0xeb, INSTR_RRF_R0RR2 },
+ { "locr", 0xf2, INSTR_RRF_M0RR },
+ { "nrk", 0xf4, INSTR_RRF_R0RR2 },
+ { "ork", 0xf6, INSTR_RRF_R0RR2 },
+ { "xrk", 0xf7, INSTR_RRF_R0RR2 },
+ { "ark", 0xf8, INSTR_RRF_R0RR2 },
+ { "srk", 0xf9, INSTR_RRF_R0RR2 },
+ { "alrk", 0xfa, INSTR_RRF_R0RR2 },
+ { "slrk", 0xfb, INSTR_RRF_R0RR2 },
+ { "kmac", 0x1e, INSTR_RRE_RR },
+ { "lrvr", 0x1f, INSTR_RRE_RR },
+ { "km", 0x2e, INSTR_RRE_RR },
+ { "kmc", 0x2f, INSTR_RRE_RR },
+ { "kimd", 0x3e, INSTR_RRE_RR },
+ { "klmd", 0x3f, INSTR_RRE_RR },
+ { "epsw", 0x8d, INSTR_RRE_RR },
+ { "trtt", 0x90, INSTR_RRF_M0RR },
+ { "trto", 0x91, INSTR_RRF_M0RR },
+ { "trot", 0x92, INSTR_RRF_M0RR },
+ { "troo", 0x93, INSTR_RRF_M0RR },
+ { "mlr", 0x96, INSTR_RRE_RR },
+ { "dlr", 0x97, INSTR_RRE_RR },
+ { "alcr", 0x98, INSTR_RRE_RR },
+ { "slbr", 0x99, INSTR_RRE_RR },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c0[] = {
+ { "lgfi", 0x01, INSTR_RIL_RI },
+ { "xihf", 0x06, INSTR_RIL_RU },
+ { "xilf", 0x07, INSTR_RIL_RU },
+ { "iihf", 0x08, INSTR_RIL_RU },
+ { "iilf", 0x09, INSTR_RIL_RU },
+ { "nihf", 0x0a, INSTR_RIL_RU },
+ { "nilf", 0x0b, INSTR_RIL_RU },
+ { "oihf", 0x0c, INSTR_RIL_RU },
+ { "oilf", 0x0d, INSTR_RIL_RU },
+ { "llihf", 0x0e, INSTR_RIL_RU },
+ { "llilf", 0x0f, INSTR_RIL_RU },
+ { "larl", 0x00, INSTR_RIL_RP },
+ { "brcl", 0x04, INSTR_RIL_UP },
+ { "brasl", 0x05, INSTR_RIL_RP },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c2[] = {
+ { "msgfi", 0x00, INSTR_RIL_RI },
+ { "msfi", 0x01, INSTR_RIL_RI },
+ { "slgfi", 0x04, INSTR_RIL_RU },
+ { "slfi", 0x05, INSTR_RIL_RU },
+ { "agfi", 0x08, INSTR_RIL_RI },
+ { "afi", 0x09, INSTR_RIL_RI },
+ { "algfi", 0x0a, INSTR_RIL_RU },
+ { "alfi", 0x0b, INSTR_RIL_RU },
+ { "cgfi", 0x0c, INSTR_RIL_RI },
+ { "cfi", 0x0d, INSTR_RIL_RI },
+ { "clgfi", 0x0e, INSTR_RIL_RU },
+ { "clfi", 0x0f, INSTR_RIL_RU },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c4[] = {
+ { "llhrl", 0x02, INSTR_RIL_RP },
+ { "lghrl", 0x04, INSTR_RIL_RP },
+ { "lhrl", 0x05, INSTR_RIL_RP },
+ { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP },
+ { "sthrl", 0x07, INSTR_RIL_RP },
+ { "lgrl", 0x08, INSTR_RIL_RP },
+ { "stgrl", 0x0b, INSTR_RIL_RP },
+ { "lgfrl", 0x0c, INSTR_RIL_RP },
+ { "lrl", 0x0d, INSTR_RIL_RP },
+ { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP },
+ { "strl", 0x0f, INSTR_RIL_RP },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c6[] = {
+ { "exrl", 0x00, INSTR_RIL_RP },
+ { "pfdrl", 0x02, INSTR_RIL_UP },
+ { "cghrl", 0x04, INSTR_RIL_RP },
+ { "chrl", 0x05, INSTR_RIL_RP },
+ { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP },
+ { "clhrl", 0x07, INSTR_RIL_RP },
+ { "cgrl", 0x08, INSTR_RIL_RP },
+ { "clgrl", 0x0a, INSTR_RIL_RP },
+ { "cgfrl", 0x0c, INSTR_RIL_RP },
+ { "crl", 0x0d, INSTR_RIL_RP },
+ { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP },
+ { "clrl", 0x0f, INSTR_RIL_RP },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_c8[] = {
+ { "mvcos", 0x00, INSTR_SSF_RRDRD },
+ { "ectg", 0x01, INSTR_SSF_RRDRD },
+ { "csst", 0x02, INSTR_SSF_RRDRD },
+ { "lpd", 0x04, INSTR_SSF_RRDRD2 },
+ { "lpdg", 0x05, INSTR_SSF_RRDRD2 },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_cc[] = {
+ { "brcth", 0x06, INSTR_RIL_RP },
+ { "aih", 0x08, INSTR_RIL_RI },
+ { "alsih", 0x0a, INSTR_RIL_RI },
+ { { 0, LONG_INSN_ALSIHN }, 0x0b, INSTR_RIL_RI },
+ { "cih", 0x0d, INSTR_RIL_RI },
+ { "clih", 0x0f, INSTR_RIL_RI },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_e3[] = {
+ { "ltg", 0x02, INSTR_RXY_RRRD },
+ { "lrag", 0x03, INSTR_RXY_RRRD },
+ { "lg", 0x04, INSTR_RXY_RRRD },
+ { "cvby", 0x06, INSTR_RXY_RRRD },
+ { "ag", 0x08, INSTR_RXY_RRRD },
+ { "sg", 0x09, INSTR_RXY_RRRD },
+ { "alg", 0x0a, INSTR_RXY_RRRD },
+ { "slg", 0x0b, INSTR_RXY_RRRD },
+ { "msg", 0x0c, INSTR_RXY_RRRD },
+ { "dsg", 0x0d, INSTR_RXY_RRRD },
+ { "cvbg", 0x0e, INSTR_RXY_RRRD },
+ { "lrvg", 0x0f, INSTR_RXY_RRRD },
+ { "lt", 0x12, INSTR_RXY_RRRD },
+ { "lray", 0x13, INSTR_RXY_RRRD },
+ { "lgf", 0x14, INSTR_RXY_RRRD },
+ { "lgh", 0x15, INSTR_RXY_RRRD },
+ { "llgf", 0x16, INSTR_RXY_RRRD },
+ { "llgt", 0x17, INSTR_RXY_RRRD },
+ { "agf", 0x18, INSTR_RXY_RRRD },
+ { "sgf", 0x19, INSTR_RXY_RRRD },
+ { "algf", 0x1a, INSTR_RXY_RRRD },
+ { "slgf", 0x1b, INSTR_RXY_RRRD },
+ { "msgf", 0x1c, INSTR_RXY_RRRD },
+ { "dsgf", 0x1d, INSTR_RXY_RRRD },
+ { "cg", 0x20, INSTR_RXY_RRRD },
+ { "clg", 0x21, INSTR_RXY_RRRD },
+ { "stg", 0x24, INSTR_RXY_RRRD },
+ { "ntstg", 0x25, INSTR_RXY_RRRD },
+ { "cvdy", 0x26, INSTR_RXY_RRRD },
+ { "cvdg", 0x2e, INSTR_RXY_RRRD },
+ { "strvg", 0x2f, INSTR_RXY_RRRD },
+ { "cgf", 0x30, INSTR_RXY_RRRD },
+ { "clgf", 0x31, INSTR_RXY_RRRD },
+ { "ltgf", 0x32, INSTR_RXY_RRRD },
+ { "cgh", 0x34, INSTR_RXY_RRRD },
+ { "pfd", 0x36, INSTR_RXY_URRD },
+ { "strvh", 0x3f, INSTR_RXY_RRRD },
+ { "bctg", 0x46, INSTR_RXY_RRRD },
+ { "sty", 0x50, INSTR_RXY_RRRD },
+ { "msy", 0x51, INSTR_RXY_RRRD },
+ { "ny", 0x54, INSTR_RXY_RRRD },
+ { "cly", 0x55, INSTR_RXY_RRRD },
+ { "oy", 0x56, INSTR_RXY_RRRD },
+ { "xy", 0x57, INSTR_RXY_RRRD },
+ { "ly", 0x58, INSTR_RXY_RRRD },
+ { "cy", 0x59, INSTR_RXY_RRRD },
+ { "ay", 0x5a, INSTR_RXY_RRRD },
+ { "sy", 0x5b, INSTR_RXY_RRRD },
+ { "mfy", 0x5c, INSTR_RXY_RRRD },
+ { "aly", 0x5e, INSTR_RXY_RRRD },
+ { "sly", 0x5f, INSTR_RXY_RRRD },
+ { "sthy", 0x70, INSTR_RXY_RRRD },
+ { "lay", 0x71, INSTR_RXY_RRRD },
+ { "stcy", 0x72, INSTR_RXY_RRRD },
+ { "icy", 0x73, INSTR_RXY_RRRD },
+ { "laey", 0x75, INSTR_RXY_RRRD },
+ { "lb", 0x76, INSTR_RXY_RRRD },
+ { "lgb", 0x77, INSTR_RXY_RRRD },
+ { "lhy", 0x78, INSTR_RXY_RRRD },
+ { "chy", 0x79, INSTR_RXY_RRRD },
+ { "ahy", 0x7a, INSTR_RXY_RRRD },
+ { "shy", 0x7b, INSTR_RXY_RRRD },
+ { "mhy", 0x7c, INSTR_RXY_RRRD },
+ { "ng", 0x80, INSTR_RXY_RRRD },
+ { "og", 0x81, INSTR_RXY_RRRD },
+ { "xg", 0x82, INSTR_RXY_RRRD },
+ { "lgat", 0x85, INSTR_RXY_RRRD },
+ { "mlg", 0x86, INSTR_RXY_RRRD },
+ { "dlg", 0x87, INSTR_RXY_RRRD },
+ { "alcg", 0x88, INSTR_RXY_RRRD },
+ { "slbg", 0x89, INSTR_RXY_RRRD },
+ { "stpq", 0x8e, INSTR_RXY_RRRD },
+ { "lpq", 0x8f, INSTR_RXY_RRRD },
+ { "llgc", 0x90, INSTR_RXY_RRRD },
+ { "llgh", 0x91, INSTR_RXY_RRRD },
+ { "llc", 0x94, INSTR_RXY_RRRD },
+ { "llh", 0x95, INSTR_RXY_RRRD },
+ { { 0, LONG_INSN_LLGTAT }, 0x9c, INSTR_RXY_RRRD },
+ { { 0, LONG_INSN_LLGFAT }, 0x9d, INSTR_RXY_RRRD },
+ { "lat", 0x9f, INSTR_RXY_RRRD },
+ { "lbh", 0xc0, INSTR_RXY_RRRD },
+ { "llch", 0xc2, INSTR_RXY_RRRD },
+ { "stch", 0xc3, INSTR_RXY_RRRD },
+ { "lhh", 0xc4, INSTR_RXY_RRRD },
+ { "llhh", 0xc6, INSTR_RXY_RRRD },
+ { "sthh", 0xc7, INSTR_RXY_RRRD },
+ { "lfhat", 0xc8, INSTR_RXY_RRRD },
+ { "lfh", 0xca, INSTR_RXY_RRRD },
+ { "stfh", 0xcb, INSTR_RXY_RRRD },
+ { "chf", 0xcd, INSTR_RXY_RRRD },
+ { "clhf", 0xcf, INSTR_RXY_RRRD },
+ { { 0, LONG_INSN_MPCIFC }, 0xd0, INSTR_RXY_RRRD },
+ { { 0, LONG_INSN_STPCIFC }, 0xd4, INSTR_RXY_RRRD },
+ { "lrv", 0x1e, INSTR_RXY_RRRD },
+ { "lrvh", 0x1f, INSTR_RXY_RRRD },
+ { "strv", 0x3e, INSTR_RXY_RRRD },
+ { "ml", 0x96, INSTR_RXY_RRRD },
+ { "dl", 0x97, INSTR_RXY_RRRD },
+ { "alc", 0x98, INSTR_RXY_RRRD },
+ { "slb", 0x99, INSTR_RXY_RRRD },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_e5[] = {
+ { "strag", 0x02, INSTR_SSE_RDRD },
+ { "mvhhi", 0x44, INSTR_SIL_RDI },
+ { "mvghi", 0x48, INSTR_SIL_RDI },
+ { "mvhi", 0x4c, INSTR_SIL_RDI },
+ { "chhsi", 0x54, INSTR_SIL_RDI },
+ { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU },
+ { "cghsi", 0x58, INSTR_SIL_RDI },
+ { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU },
+ { "chsi", 0x5c, INSTR_SIL_RDI },
+ { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU },
+ { { 0, LONG_INSN_TBEGIN }, 0x60, INSTR_SIL_RDU },
+ { { 0, LONG_INSN_TBEGINC }, 0x61, INSTR_SIL_RDU },
+ { "lasp", 0x00, INSTR_SSE_RDRD },
+ { "tprot", 0x01, INSTR_SSE_RDRD },
+ { "mvcsk", 0x0e, INSTR_SSE_RDRD },
+ { "mvcdk", 0x0f, INSTR_SSE_RDRD },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_e7[] = {
+ { "lcbb", 0x27, INSTR_RXE_RRRDM },
+ { "vgef", 0x13, INSTR_VRV_VVRDM },
+ { "vgeg", 0x12, INSTR_VRV_VVRDM },
+ { "vgbm", 0x44, INSTR_VRI_V0I0 },
+ { "vgm", 0x46, INSTR_VRI_V0IIM },
+ { "vl", 0x06, INSTR_VRX_VRRD0 },
+ { "vlr", 0x56, INSTR_VRR_VV00000 },
+ { "vlrp", 0x05, INSTR_VRX_VRRDM },
+ { "vleb", 0x00, INSTR_VRX_VRRDM },
+ { "vleh", 0x01, INSTR_VRX_VRRDM },
+ { "vlef", 0x03, INSTR_VRX_VRRDM },
+ { "vleg", 0x02, INSTR_VRX_VRRDM },
+ { "vleib", 0x40, INSTR_VRI_V0IM },
+ { "vleih", 0x41, INSTR_VRI_V0IM },
+ { "vleif", 0x43, INSTR_VRI_V0IM },
+ { "vleig", 0x42, INSTR_VRI_V0IM },
+ { "vlgv", 0x21, INSTR_VRS_RVRDM },
+ { "vllez", 0x04, INSTR_VRX_VRRDM },
+ { "vlm", 0x36, INSTR_VRS_VVRD0 },
+ { "vlbb", 0x07, INSTR_VRX_VRRDM },
+ { "vlvg", 0x22, INSTR_VRS_VRRDM },
+ { "vlvgp", 0x62, INSTR_VRR_VRR0000 },
+ { "vll", 0x37, INSTR_VRS_VRRD0 },
+ { "vmrh", 0x61, INSTR_VRR_VVV000M },
+ { "vmrl", 0x60, INSTR_VRR_VVV000M },
+ { "vpk", 0x94, INSTR_VRR_VVV000M },
+ { "vpks", 0x97, INSTR_VRR_VVV0M0M },
+ { "vpkls", 0x95, INSTR_VRR_VVV0M0M },
+ { "vperm", 0x8c, INSTR_VRR_VVV000V },
+ { "vpdi", 0x84, INSTR_VRR_VVV000M },
+ { "vrep", 0x4d, INSTR_VRI_VVIM },
+ { "vrepi", 0x45, INSTR_VRI_V0IM },
+ { "vscef", 0x1b, INSTR_VRV_VWRDM },
+ { "vsceg", 0x1a, INSTR_VRV_VWRDM },
+ { "vsel", 0x8d, INSTR_VRR_VVV000V },
+ { "vseg", 0x5f, INSTR_VRR_VV0000M },
+ { "vst", 0x0e, INSTR_VRX_VRRD0 },
+ { "vsteb", 0x08, INSTR_VRX_VRRDM },
+ { "vsteh", 0x09, INSTR_VRX_VRRDM },
+ { "vstef", 0x0b, INSTR_VRX_VRRDM },
+ { "vsteg", 0x0a, INSTR_VRX_VRRDM },
+ { "vstm", 0x3e, INSTR_VRS_VVRD0 },
+ { "vstl", 0x3f, INSTR_VRS_VRRD0 },
+ { "vuph", 0xd7, INSTR_VRR_VV0000M },
+ { "vuplh", 0xd5, INSTR_VRR_VV0000M },
+ { "vupl", 0xd6, INSTR_VRR_VV0000M },
+ { "vupll", 0xd4, INSTR_VRR_VV0000M },
+ { "va", 0xf3, INSTR_VRR_VVV000M },
+ { "vacc", 0xf1, INSTR_VRR_VVV000M },
+ { "vac", 0xbb, INSTR_VRR_VVVM00V },
+ { "vaccc", 0xb9, INSTR_VRR_VVVM00V },
+ { "vn", 0x68, INSTR_VRR_VVV0000 },
+ { "vnc", 0x69, INSTR_VRR_VVV0000 },
+ { "vavg", 0xf2, INSTR_VRR_VVV000M },
+ { "vavgl", 0xf0, INSTR_VRR_VVV000M },
+ { "vcksm", 0x66, INSTR_VRR_VVV0000 },
+ { "vec", 0xdb, INSTR_VRR_VV0000M },
+ { "vecl", 0xd9, INSTR_VRR_VV0000M },
+ { "vceq", 0xf8, INSTR_VRR_VVV0M0M },
+ { "vch", 0xfb, INSTR_VRR_VVV0M0M },
+ { "vchl", 0xf9, INSTR_VRR_VVV0M0M },
+ { "vclz", 0x53, INSTR_VRR_VV0000M },
+ { "vctz", 0x52, INSTR_VRR_VV0000M },
+ { "vx", 0x6d, INSTR_VRR_VVV0000 },
+ { "vgfm", 0xb4, INSTR_VRR_VVV000M },
+ { "vgfma", 0xbc, INSTR_VRR_VVVM00V },
+ { "vlc", 0xde, INSTR_VRR_VV0000M },
+ { "vlp", 0xdf, INSTR_VRR_VV0000M },
+ { "vmx", 0xff, INSTR_VRR_VVV000M },
+ { "vmxl", 0xfd, INSTR_VRR_VVV000M },
+ { "vmn", 0xfe, INSTR_VRR_VVV000M },
+ { "vmnl", 0xfc, INSTR_VRR_VVV000M },
+ { "vmal", 0xaa, INSTR_VRR_VVVM00V },
+ { "vmae", 0xae, INSTR_VRR_VVVM00V },
+ { "vmale", 0xac, INSTR_VRR_VVVM00V },
+ { "vmah", 0xab, INSTR_VRR_VVVM00V },
+ { "vmalh", 0xa9, INSTR_VRR_VVVM00V },
+ { "vmao", 0xaf, INSTR_VRR_VVVM00V },
+ { "vmalo", 0xad, INSTR_VRR_VVVM00V },
+ { "vmh", 0xa3, INSTR_VRR_VVV000M },
+ { "vmlh", 0xa1, INSTR_VRR_VVV000M },
+ { "vml", 0xa2, INSTR_VRR_VVV000M },
+ { "vme", 0xa6, INSTR_VRR_VVV000M },
+ { "vmle", 0xa4, INSTR_VRR_VVV000M },
+ { "vmo", 0xa7, INSTR_VRR_VVV000M },
+ { "vmlo", 0xa5, INSTR_VRR_VVV000M },
+ { "vno", 0x6b, INSTR_VRR_VVV0000 },
+ { "vo", 0x6a, INSTR_VRR_VVV0000 },
+ { { 0, LONG_INSN_VPOPCT }, 0x50, INSTR_VRR_VV0000M },
+ { { 0, LONG_INSN_VERLLV }, 0x73, INSTR_VRR_VVV000M },
+ { "verll", 0x33, INSTR_VRS_VVRDM },
+ { "verim", 0x72, INSTR_VRI_VVV0IM },
+ { "veslv", 0x70, INSTR_VRR_VVV000M },
+ { "vesl", 0x30, INSTR_VRS_VVRDM },
+ { { 0, LONG_INSN_VESRAV }, 0x7a, INSTR_VRR_VVV000M },
+ { "vesra", 0x3a, INSTR_VRS_VVRDM },
+ { { 0, LONG_INSN_VESRLV }, 0x78, INSTR_VRR_VVV000M },
+ { "vesrl", 0x38, INSTR_VRS_VVRDM },
+ { "vsl", 0x74, INSTR_VRR_VVV0000 },
+ { "vslb", 0x75, INSTR_VRR_VVV0000 },
+ { "vsldb", 0x77, INSTR_VRI_VVV0I0 },
+ { "vsra", 0x7e, INSTR_VRR_VVV0000 },
+ { "vsrab", 0x7f, INSTR_VRR_VVV0000 },
+ { "vsrl", 0x7c, INSTR_VRR_VVV0000 },
+ { "vsrlb", 0x7d, INSTR_VRR_VVV0000 },
+ { "vs", 0xf7, INSTR_VRR_VVV000M },
+ { "vscb", 0xf5, INSTR_VRR_VVV000M },
+ { "vsb", 0xbf, INSTR_VRR_VVVM00V },
+ { { 0, LONG_INSN_VSBCBI }, 0xbd, INSTR_VRR_VVVM00V },
+ { "vsumg", 0x65, INSTR_VRR_VVV000M },
+ { "vsumq", 0x67, INSTR_VRR_VVV000M },
+ { "vsum", 0x64, INSTR_VRR_VVV000M },
+ { "vtm", 0xd8, INSTR_VRR_VV00000 },
+ { "vfae", 0x82, INSTR_VRR_VVV0M0M },
+ { "vfee", 0x80, INSTR_VRR_VVV0M0M },
+ { "vfene", 0x81, INSTR_VRR_VVV0M0M },
+ { "vistr", 0x5c, INSTR_VRR_VV00M0M },
+ { "vstrc", 0x8a, INSTR_VRR_VVVMM0V },
+ { "vfa", 0xe3, INSTR_VRR_VVV00MM },
+ { "wfc", 0xcb, INSTR_VRR_VV000MM },
+ { "wfk", 0xca, INSTR_VRR_VV000MM },
+ { "vfce", 0xe8, INSTR_VRR_VVV0MMM },
+ { "vfch", 0xeb, INSTR_VRR_VVV0MMM },
+ { "vfche", 0xea, INSTR_VRR_VVV0MMM },
+ { "vcdg", 0xc3, INSTR_VRR_VV00MMM },
+ { "vcdlg", 0xc1, INSTR_VRR_VV00MMM },
+ { "vcgd", 0xc2, INSTR_VRR_VV00MMM },
+ { "vclgd", 0xc0, INSTR_VRR_VV00MMM },
+ { "vfd", 0xe5, INSTR_VRR_VVV00MM },
+ { "vfi", 0xc7, INSTR_VRR_VV00MMM },
+ { "vlde", 0xc4, INSTR_VRR_VV000MM },
+ { "vled", 0xc5, INSTR_VRR_VV00MMM },
+ { "vfm", 0xe7, INSTR_VRR_VVV00MM },
+ { "vfma", 0x8f, INSTR_VRR_VVVM0MV },
+ { "vfms", 0x8e, INSTR_VRR_VVVM0MV },
+ { "vfpso", 0xcc, INSTR_VRR_VV00MMM },
+ { "vfsq", 0xce, INSTR_VRR_VV000MM },
+ { "vfs", 0xe2, INSTR_VRR_VVV00MM },
+ { "vftci", 0x4a, INSTR_VRI_VVIMM },
+};
+
+static struct s390_insn opcode_eb[] = {
+ { "lmg", 0x04, INSTR_RSY_RRRD },
+ { "srag", 0x0a, INSTR_RSY_RRRD },
+ { "slag", 0x0b, INSTR_RSY_RRRD },
+ { "srlg", 0x0c, INSTR_RSY_RRRD },
+ { "sllg", 0x0d, INSTR_RSY_RRRD },
+ { "tracg", 0x0f, INSTR_RSY_RRRD },
+ { "csy", 0x14, INSTR_RSY_RRRD },
+ { "rllg", 0x1c, INSTR_RSY_RRRD },
+ { "clmh", 0x20, INSTR_RSY_RURD },
+ { "clmy", 0x21, INSTR_RSY_RURD },
+ { "clt", 0x23, INSTR_RSY_RURD },
+ { "stmg", 0x24, INSTR_RSY_RRRD },
+ { "stctg", 0x25, INSTR_RSY_CCRD },
+ { "stmh", 0x26, INSTR_RSY_RRRD },
+ { "clgt", 0x2b, INSTR_RSY_RURD },
+ { "stcmh", 0x2c, INSTR_RSY_RURD },
+ { "stcmy", 0x2d, INSTR_RSY_RURD },
+ { "lctlg", 0x2f, INSTR_RSY_CCRD },
+ { "csg", 0x30, INSTR_RSY_RRRD },
+ { "cdsy", 0x31, INSTR_RSY_RRRD },
+ { "cdsg", 0x3e, INSTR_RSY_RRRD },
+ { "bxhg", 0x44, INSTR_RSY_RRRD },
+ { "bxleg", 0x45, INSTR_RSY_RRRD },
+ { "ecag", 0x4c, INSTR_RSY_RRRD },
+ { "tmy", 0x51, INSTR_SIY_URD },
+ { "mviy", 0x52, INSTR_SIY_URD },
+ { "niy", 0x54, INSTR_SIY_URD },
+ { "cliy", 0x55, INSTR_SIY_URD },
+ { "oiy", 0x56, INSTR_SIY_URD },
+ { "xiy", 0x57, INSTR_SIY_URD },
+ { "asi", 0x6a, INSTR_SIY_IRD },
+ { "alsi", 0x6e, INSTR_SIY_IRD },
+ { "agsi", 0x7a, INSTR_SIY_IRD },
+ { "algsi", 0x7e, INSTR_SIY_IRD },
+ { "icmh", 0x80, INSTR_RSY_RURD },
+ { "icmy", 0x81, INSTR_RSY_RURD },
+ { "clclu", 0x8f, INSTR_RSY_RRRD },
+ { "stmy", 0x90, INSTR_RSY_RRRD },
+ { "lmh", 0x96, INSTR_RSY_RRRD },
+ { "lmy", 0x98, INSTR_RSY_RRRD },
+ { "lamy", 0x9a, INSTR_RSY_AARD },
+ { "stamy", 0x9b, INSTR_RSY_AARD },
+ { { 0, LONG_INSN_PCISTB }, 0xd0, INSTR_RSY_RRRD },
+ { "sic", 0xd1, INSTR_RSY_RRRD },
+ { "srak", 0xdc, INSTR_RSY_RRRD },
+ { "slak", 0xdd, INSTR_RSY_RRRD },
+ { "srlk", 0xde, INSTR_RSY_RRRD },
+ { "sllk", 0xdf, INSTR_RSY_RRRD },
+ { "locg", 0xe2, INSTR_RSY_RDRM },
+ { "stocg", 0xe3, INSTR_RSY_RDRM },
+ { "lang", 0xe4, INSTR_RSY_RRRD },
+ { "laog", 0xe6, INSTR_RSY_RRRD },
+ { "laxg", 0xe7, INSTR_RSY_RRRD },
+ { "laag", 0xe8, INSTR_RSY_RRRD },
+ { "laalg", 0xea, INSTR_RSY_RRRD },
+ { "loc", 0xf2, INSTR_RSY_RDRM },
+ { "stoc", 0xf3, INSTR_RSY_RDRM },
+ { "lan", 0xf4, INSTR_RSY_RRRD },
+ { "lao", 0xf6, INSTR_RSY_RRRD },
+ { "lax", 0xf7, INSTR_RSY_RRRD },
+ { "laa", 0xf8, INSTR_RSY_RRRD },
+ { "laal", 0xfa, INSTR_RSY_RRRD },
+ { "lric", 0x60, INSTR_RSY_RDRM },
+ { "stric", 0x61, INSTR_RSY_RDRM },
+ { "mric", 0x62, INSTR_RSY_RDRM },
+ { { 0, LONG_INSN_STCCTM }, 0x17, INSTR_RSY_RMRD },
+ { "rll", 0x1d, INSTR_RSY_RRRD },
+ { "mvclu", 0x8e, INSTR_RSY_RRRD },
+ { "tp", 0xc0, INSTR_RSL_R0RD },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_ec[] = {
+ { "brxhg", 0x44, INSTR_RIE_RRP },
+ { "brxlg", 0x45, INSTR_RIE_RRP },
+ { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU },
+ { "rnsbg", 0x54, INSTR_RIE_RRUUU },
+ { "risbg", 0x55, INSTR_RIE_RRUUU },
+ { "rosbg", 0x56, INSTR_RIE_RRUUU },
+ { "rxsbg", 0x57, INSTR_RIE_RRUUU },
+ { { 0, LONG_INSN_RISBGN }, 0x59, INSTR_RIE_RRUUU },
+ { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU },
+ { "cgrj", 0x64, INSTR_RIE_RRPU },
+ { "clgrj", 0x65, INSTR_RIE_RRPU },
+ { "cgit", 0x70, INSTR_RIE_R0IU },
+ { "clgit", 0x71, INSTR_RIE_R0UU },
+ { "cit", 0x72, INSTR_RIE_R0IU },
+ { "clfit", 0x73, INSTR_RIE_R0UU },
+ { "crj", 0x76, INSTR_RIE_RRPU },
+ { "clrj", 0x77, INSTR_RIE_RRPU },
+ { "cgij", 0x7c, INSTR_RIE_RUPI },
+ { "clgij", 0x7d, INSTR_RIE_RUPU },
+ { "cij", 0x7e, INSTR_RIE_RUPI },
+ { "clij", 0x7f, INSTR_RIE_RUPU },
+ { "ahik", 0xd8, INSTR_RIE_RRI0 },
+ { "aghik", 0xd9, INSTR_RIE_RRI0 },
+ { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 },
+ { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 },
+ { "cgrb", 0xe4, INSTR_RRS_RRRDU },
+ { "clgrb", 0xe5, INSTR_RRS_RRRDU },
+ { "crb", 0xf6, INSTR_RRS_RRRDU },
+ { "clrb", 0xf7, INSTR_RRS_RRRDU },
+ { "cgib", 0xfc, INSTR_RIS_RURDI },
+ { "clgib", 0xfd, INSTR_RIS_RURDU },
+ { "cib", 0xfe, INSTR_RIS_RURDI },
+ { "clib", 0xff, INSTR_RIS_RURDU },
+ { "", 0, INSTR_INVALID }
+};
+
+static struct s390_insn opcode_ed[] = {
+ { "mayl", 0x38, INSTR_RXF_FRRDF },
+ { "myl", 0x39, INSTR_RXF_FRRDF },
+ { "may", 0x3a, INSTR_RXF_FRRDF },
+ { "my", 0x3b, INSTR_RXF_FRRDF },
+ { "mayh", 0x3c, INSTR_RXF_FRRDF },
+ { "myh", 0x3d, INSTR_RXF_FRRDF },
+ { "sldt", 0x40, INSTR_RXF_FRRDF },
+ { "srdt", 0x41, INSTR_RXF_FRRDF },
+ { "slxt", 0x48, INSTR_RXF_FRRDF },
+ { "srxt", 0x49, INSTR_RXF_FRRDF },
+ { "tdcet", 0x50, INSTR_RXE_FRRD },
+ { "tdget", 0x51, INSTR_RXE_FRRD },
+ { "tdcdt", 0x54, INSTR_RXE_FRRD },
+ { "tdgdt", 0x55, INSTR_RXE_FRRD },
+ { "tdcxt", 0x58, INSTR_RXE_FRRD },
+ { "tdgxt", 0x59, INSTR_RXE_FRRD },
+ { "ley", 0x64, INSTR_RXY_FRRD },
+ { "ldy", 0x65, INSTR_RXY_FRRD },
+ { "stey", 0x66, INSTR_RXY_FRRD },
+ { "stdy", 0x67, INSTR_RXY_FRRD },
+ { "czdt", 0xa8, INSTR_RSL_LRDFU },
+ { "czxt", 0xa9, INSTR_RSL_LRDFU },
+ { "cdzt", 0xaa, INSTR_RSL_LRDFU },
+ { "cxzt", 0xab, INSTR_RSL_LRDFU },
+ { "ldeb", 0x04, INSTR_RXE_FRRD },
+ { "lxdb", 0x05, INSTR_RXE_FRRD },
+ { "lxeb", 0x06, INSTR_RXE_FRRD },
+ { "mxdb", 0x07, INSTR_RXE_FRRD },
+ { "keb", 0x08, INSTR_RXE_FRRD },
+ { "ceb", 0x09, INSTR_RXE_FRRD },
+ { "aeb", 0x0a, INSTR_RXE_FRRD },
+ { "seb", 0x0b, INSTR_RXE_FRRD },
+ { "mdeb", 0x0c, INSTR_RXE_FRRD },
+ { "deb", 0x0d, INSTR_RXE_FRRD },
+ { "maeb", 0x0e, INSTR_RXF_FRRDF },
+ { "mseb", 0x0f, INSTR_RXF_FRRDF },
+ { "tceb", 0x10, INSTR_RXE_FRRD },
+ { "tcdb", 0x11, INSTR_RXE_FRRD },
+ { "tcxb", 0x12, INSTR_RXE_FRRD },
+ { "sqeb", 0x14, INSTR_RXE_FRRD },
+ { "sqdb", 0x15, INSTR_RXE_FRRD },
+ { "meeb", 0x17, INSTR_RXE_FRRD },
+ { "kdb", 0x18, INSTR_RXE_FRRD },
+ { "cdb", 0x19, INSTR_RXE_FRRD },
+ { "adb", 0x1a, INSTR_RXE_FRRD },
+ { "sdb", 0x1b, INSTR_RXE_FRRD },
+ { "mdb", 0x1c, INSTR_RXE_FRRD },
+ { "ddb", 0x1d, INSTR_RXE_FRRD },
+ { "madb", 0x1e, INSTR_RXF_FRRDF },
+ { "msdb", 0x1f, INSTR_RXF_FRRDF },
+ { "lde", 0x24, INSTR_RXE_FRRD },
+ { "lxd", 0x25, INSTR_RXE_FRRD },
+ { "lxe", 0x26, INSTR_RXE_FRRD },
+ { "mae", 0x2e, INSTR_RXF_FRRDF },
+ { "mse", 0x2f, INSTR_RXF_FRRDF },
+ { "sqe", 0x34, INSTR_RXE_FRRD },
+ { "sqd", 0x35, INSTR_RXE_FRRD },
+ { "mee", 0x37, INSTR_RXE_FRRD },
+ { "mad", 0x3e, INSTR_RXF_FRRDF },
+ { "msd", 0x3f, INSTR_RXF_FRRDF },
+ { "", 0, INSTR_INVALID }
+};
+
+/* Extracts an operand value from an instruction. */
+static unsigned int extract_operand(unsigned char *code,
+ const struct s390_operand *operand)
+{
+ unsigned char *cp;
+ unsigned int val;
+ int bits;
+
+ /* Extract fragments of the operand byte for byte. */
+ cp = code + operand->shift / 8;
+ bits = (operand->shift & 7) + operand->bits;
+ val = 0;
+ do {
+ val <<= 8;
+ val |= (unsigned int) *cp++;
+ bits -= 8;
+ } while (bits > 0);
+ val >>= -bits;
+ val &= ((1U << (operand->bits - 1)) << 1) - 1;
+
+ /* Check for special long displacement case. */
+ if (operand->bits == 20 && operand->shift == 20)
+ val = (val & 0xff) << 12 | (val & 0xfff00) >> 8;
+
+ /* Check for register extensions bits for vector registers. */
+ if (operand->flags & OPERAND_VR) {
+ if (operand->shift == 8)
+ val |= (code[4] & 8) << 1;
+ else if (operand->shift == 12)
+ val |= (code[4] & 4) << 2;
+ else if (operand->shift == 16)
+ val |= (code[4] & 2) << 3;
+ else if (operand->shift == 32)
+ val |= (code[4] & 1) << 4;
+ }
+
+ /* Sign extend value if the operand is signed or pc relative. */
+ if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) &&
+ (val & (1U << (operand->bits - 1))))
+ val |= (-1U << (operand->bits - 1)) << 1;
+
+ /* Double value if the operand is pc relative. */
+ if (operand->flags & OPERAND_PCREL)
+ val <<= 1;
+
+ /* Length x in an instructions has real length x + 1. */
+ if (operand->flags & OPERAND_LENGTH)
+ val++;
+ return val;
+}
+
+struct s390_insn *find_insn(unsigned char *code)
+{
+ unsigned char opfrag = code[1];
+ unsigned char opmask;
+ struct s390_insn *table;
+
+ switch (code[0]) {
+ case 0x01:
+ table = opcode_01;
+ break;
+ case 0xa5:
+ table = opcode_a5;
+ break;
+ case 0xa7:
+ table = opcode_a7;
+ break;
+ case 0xaa:
+ table = opcode_aa;
+ break;
+ case 0xb2:
+ table = opcode_b2;
+ break;
+ case 0xb3:
+ table = opcode_b3;
+ break;
+ case 0xb9:
+ table = opcode_b9;
+ break;
+ case 0xc0:
+ table = opcode_c0;
+ break;
+ case 0xc2:
+ table = opcode_c2;
+ break;
+ case 0xc4:
+ table = opcode_c4;
+ break;
+ case 0xc6:
+ table = opcode_c6;
+ break;
+ case 0xc8:
+ table = opcode_c8;
+ break;
+ case 0xcc:
+ table = opcode_cc;
+ break;
+ case 0xe3:
+ table = opcode_e3;
+ opfrag = code[5];
+ break;
+ case 0xe5:
+ table = opcode_e5;
+ break;
+ case 0xe7:
+ table = opcode_e7;
+ opfrag = code[5];
+ break;
+ case 0xeb:
+ table = opcode_eb;
+ opfrag = code[5];
+ break;
+ case 0xec:
+ table = opcode_ec;
+ opfrag = code[5];
+ break;
+ case 0xed:
+ table = opcode_ed;
+ opfrag = code[5];
+ break;
+ default:
+ table = opcode;
+ opfrag = code[0];
+ break;
+ }
+ while (table->format != INSTR_INVALID) {
+ opmask = formats[table->format][0];
+ if (table->opfrag == (opfrag & opmask))
+ return table;
+ table++;
+ }
+ return NULL;
+}
+
+/**
+ * insn_to_mnemonic - decode an s390 instruction
+ * @instruction: instruction to decode
+ * @buf: buffer to fill with mnemonic
+ * @len: length of buffer
+ *
+ * Decode the instruction at @instruction and store the corresponding
+ * mnemonic into @buf of length @len.
+ * @buf is left unchanged if the instruction could not be decoded.
+ * Returns:
+ * %0 on success, %-ENOENT if the instruction was not found.
+ */
+int insn_to_mnemonic(unsigned char *instruction, char *buf, unsigned int len)
+{
+ struct s390_insn *insn;
+
+ insn = find_insn(instruction);
+ if (!insn)
+ return -ENOENT;
+ if (insn->name[0] == '\0')
+ snprintf(buf, len, "%s",
+ long_insn_name[(int) insn->name[1]]);
+ else
+ snprintf(buf, len, "%.5s", insn->name);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(insn_to_mnemonic);
+
+static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
+{
+ struct s390_insn *insn;
+ const unsigned char *ops;
+ const struct s390_operand *operand;
+ unsigned int value;
+ char separator;
+ char *ptr;
+ int i;
+
+ ptr = buffer;
+ insn = find_insn(code);
+ if (insn) {
+ if (insn->name[0] == '\0')
+ ptr += sprintf(ptr, "%s\t",
+ long_insn_name[(int) insn->name[1]]);
+ else
+ ptr += sprintf(ptr, "%.5s\t", insn->name);
+ /* Extract the operands. */
+ separator = 0;
+ for (ops = formats[insn->format] + 1, i = 0;
+ *ops != 0 && i < 6; ops++, i++) {
+ operand = operands + *ops;
+ value = extract_operand(code, operand);
+ if ((operand->flags & OPERAND_INDEX) && value == 0)
+ continue;
+ if ((operand->flags & OPERAND_BASE) &&
+ value == 0 && separator == '(') {
+ separator = ',';
+ continue;
+ }
+ if (separator)
+ ptr += sprintf(ptr, "%c", separator);
+ if (operand->flags & OPERAND_GPR)
+ ptr += sprintf(ptr, "%%r%i", value);
+ else if (operand->flags & OPERAND_FPR)
+ ptr += sprintf(ptr, "%%f%i", value);
+ else if (operand->flags & OPERAND_AR)
+ ptr += sprintf(ptr, "%%a%i", value);
+ else if (operand->flags & OPERAND_CR)
+ ptr += sprintf(ptr, "%%c%i", value);
+ else if (operand->flags & OPERAND_VR)
+ ptr += sprintf(ptr, "%%v%i", value);
+ else if (operand->flags & OPERAND_PCREL)
+ ptr += sprintf(ptr, "%lx", (signed int) value
+ + addr);
+ else if (operand->flags & OPERAND_SIGNED)
+ ptr += sprintf(ptr, "%i", value);
+ else
+ ptr += sprintf(ptr, "%u", value);
+ if (operand->flags & OPERAND_DISP)
+ separator = '(';
+ else if (operand->flags & OPERAND_BASE) {
+ ptr += sprintf(ptr, ")");
+ separator = ',';
+ } else
+ separator = ',';
+ }
+ } else
+ ptr += sprintf(ptr, "unknown");
+ return (int) (ptr - buffer);
+}
+
+void show_code(struct pt_regs *regs)
+{
+ char *mode = user_mode(regs) ? "User" : "Krnl";
+ unsigned char code[64];
+ char buffer[64], *ptr;
+ mm_segment_t old_fs;
+ unsigned long addr;
+ int start, end, opsize, hops, i;
+
+ /* Get a snapshot of the 64 bytes surrounding the fault address. */
+ old_fs = get_fs();
+ set_fs(user_mode(regs) ? USER_DS : KERNEL_DS);
+ for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
+ addr = regs->psw.addr - 34 + start;
+ if (__copy_from_user(code + start - 2,
+ (char __user *) addr, 2))
+ break;
+ }
+ for (end = 32; end < 64; end += 2) {
+ addr = regs->psw.addr + end - 32;
+ if (__copy_from_user(code + end,
+ (char __user *) addr, 2))
+ break;
+ }
+ set_fs(old_fs);
+ /* Code snapshot useable ? */
+ if ((regs->psw.addr & 1) || start >= end) {
+ printk("%s Code: Bad PSW.\n", mode);
+ return;
+ }
+ /* Find a starting point for the disassembly. */
+ while (start < 32) {
+ for (i = 0, hops = 0; start + i < 32 && hops < 3; hops++) {
+ if (!find_insn(code + start + i))
+ break;
+ i += insn_length(code[start + i]);
+ }
+ if (start + i == 32)
+ /* Looks good, sequence ends at PSW. */
+ break;
+ start += 2;
+ }
+ /* Decode the instructions. */
+ ptr = buffer;
+ ptr += sprintf(ptr, "%s Code:", mode);
+ hops = 0;
+ while (start < end && hops < 8) {
+ opsize = insn_length(code[start]);
+ if (start + opsize == 32)
+ *ptr++ = '#';
+ else if (start == 32)
+ *ptr++ = '>';
+ else
+ *ptr++ = ' ';
+ addr = regs->psw.addr + start - 32;
+ ptr += sprintf(ptr, "%016lx: ", addr);
+ if (start + opsize >= end)
+ break;
+ for (i = 0; i < opsize; i++)
+ ptr += sprintf(ptr, "%02x", code[start + i]);
+ *ptr++ = '\t';
+ if (i < 6)
+ *ptr++ = '\t';
+ ptr += print_insn(ptr, code + start, addr);
+ start += opsize;
+ printk(buffer);
+ ptr = buffer;
+ ptr += sprintf(ptr, "\n ");
+ hops++;
+ }
+ printk("\n");
+}
+
+void print_fn_code(unsigned char *code, unsigned long len)
+{
+ char buffer[64], *ptr;
+ int opsize, i;
+
+ while (len) {
+ ptr = buffer;
+ opsize = insn_length(*code);
+ if (opsize > len)
+ break;
+ ptr += sprintf(ptr, "%p: ", code);
+ for (i = 0; i < opsize; i++)
+ ptr += sprintf(ptr, "%02x", code[i]);
+ *ptr++ = '\t';
+ if (i < 4)
+ *ptr++ = '\t';
+ ptr += print_insn(ptr, code, (unsigned long) code);
+ *ptr++ = '\n';
+ *ptr++ = 0;
+ printk(buffer);
+ code += opsize;
+ len -= opsize;
+ }
+}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
new file mode 100644
index 000000000..dc8e20473
--- /dev/null
+++ b/arch/s390/kernel/dumpstack.c
@@ -0,0 +1,204 @@
+/*
+ * Stack dumping functions
+ *
+ * Copyright IBM Corp. 1999, 2013
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/utsname.h>
+#include <linux/export.h>
+#include <linux/kdebug.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/processor.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+#include <asm/ipl.h>
+
+/*
+ * For show_trace we have tree different stack to consider:
+ * - the panic stack which is used if the kernel stack has overflown
+ * - the asynchronous interrupt stack (cpu related)
+ * - the synchronous kernel stack (process related)
+ * The stack trace can start at any of the three stack and can potentially
+ * touch all of them. The order is: panic stack, async stack, sync stack.
+ */
+static unsigned long
+__show_trace(unsigned long sp, unsigned long low, unsigned long high)
+{
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+ unsigned long addr;
+
+ while (1) {
+ sp = sp & PSW_ADDR_INSN;
+ if (sp < low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ printk("([<%016lx>] %pSR)\n", addr, (void *)addr);
+ /* Follow the backchain. */
+ while (1) {
+ low = sp;
+ sp = sf->back_chain & PSW_ADDR_INSN;
+ if (!sp)
+ break;
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long) (sf + 1);
+ if (sp <= low || sp > high - sizeof(*regs))
+ return sp;
+ regs = (struct pt_regs *) sp;
+ addr = regs->psw.addr & PSW_ADDR_INSN;
+ printk(" [<%016lx>] %pSR\n", addr, (void *)addr);
+ low = sp;
+ sp = regs->gprs[15];
+ }
+}
+
+static void show_trace(struct task_struct *task, unsigned long *stack)
+{
+ const unsigned long frame_size =
+ STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ register unsigned long __r15 asm ("15");
+ unsigned long sp;
+
+ sp = (unsigned long) stack;
+ if (!sp)
+ sp = task ? task->thread.ksp : __r15;
+ printk("Call Trace:\n");
+#ifdef CONFIG_CHECK_STACK
+ sp = __show_trace(sp,
+ S390_lowcore.panic_stack + frame_size - 4096,
+ S390_lowcore.panic_stack + frame_size);
+#endif
+ sp = __show_trace(sp,
+ S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
+ S390_lowcore.async_stack + frame_size);
+ if (task)
+ __show_trace(sp, (unsigned long) task_stack_page(task),
+ (unsigned long) task_stack_page(task) + THREAD_SIZE);
+ else
+ __show_trace(sp, S390_lowcore.thread_info,
+ S390_lowcore.thread_info + THREAD_SIZE);
+ if (!task)
+ task = current;
+ debug_show_held_locks(task);
+}
+
+void show_stack(struct task_struct *task, unsigned long *sp)
+{
+ register unsigned long *__r15 asm ("15");
+ unsigned long *stack;
+ int i;
+
+ if (!sp)
+ stack = task ? (unsigned long *) task->thread.ksp : __r15;
+ else
+ stack = sp;
+
+ for (i = 0; i < 20; i++) {
+ if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
+ break;
+ if ((i * sizeof(long) % 32) == 0)
+ printk("%s ", i == 0 ? "" : "\n");
+ printk("%016lx ", *stack++);
+ }
+ printk("\n");
+ show_trace(task, sp);
+}
+
+static void show_last_breaking_event(struct pt_regs *regs)
+{
+ printk("Last Breaking-Event-Address:\n");
+ printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]);
+}
+
+static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
+{
+ return (regs->psw.mask & bits) / ((~bits + 1) & bits);
+}
+
+void show_registers(struct pt_regs *regs)
+{
+ char *mode;
+
+ mode = user_mode(regs) ? "User" : "Krnl";
+ printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ if (!user_mode(regs))
+ printk(" (%pSR)", (void *)regs->psw.addr);
+ printk("\n");
+ printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
+ "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
+ mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
+ mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY),
+ mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT),
+ mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC),
+ mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM));
+ printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA));
+ printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
+ regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
+ printk(" %016lx %016lx %016lx %016lx\n",
+ regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
+ printk(" %016lx %016lx %016lx %016lx\n",
+ regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
+ printk(" %016lx %016lx %016lx %016lx\n",
+ regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
+ show_code(regs);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+ show_regs_print_info(KERN_DEFAULT);
+ show_registers(regs);
+ /* Show stack backtrace if pt_regs is from kernel mode */
+ if (!user_mode(regs))
+ show_trace(NULL, (unsigned long *) regs->gprs[15]);
+ show_last_breaking_event(regs);
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void die(struct pt_regs *regs, const char *str)
+{
+ static int die_counter;
+
+ oops_enter();
+ lgr_info_log();
+ debug_stop_all();
+ console_verbose();
+ spin_lock_irq(&die_lock);
+ bust_spinlocks(1);
+ printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+ regs->int_code >> 17, ++die_counter);
+#ifdef CONFIG_PREEMPT
+ printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+ printk("SMP ");
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ printk("DEBUG_PAGEALLOC");
+#endif
+ printk("\n");
+ notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
+ print_modules();
+ show_regs(regs);
+ bust_spinlocks(0);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ spin_unlock_irq(&die_lock);
+ if (in_interrupt())
+ panic("Fatal exception in interrupt");
+ if (panic_on_oops)
+ panic("Fatal exception: panic_on_oops");
+ oops_exit();
+ do_exit(SIGSEGV);
+}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
new file mode 100644
index 000000000..549a73a4b
--- /dev/null
+++ b/arch/s390/kernel/early.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright IBM Corp. 2007, 2009
+ * Author(s): Hongjie Yang <hongjie@us.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/lockdep.h>
+#include <linux/module.h>
+#include <linux/pfn.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/sclp.h>
+#include <asm/facility.h>
+#include "entry.h"
+
+/*
+ * Create a Kernel NSS if the SAVESYS= parameter is defined
+ */
+#define DEFSYS_CMD_SIZE 128
+#define SAVESYS_CMD_SIZE 32
+
+char kernel_nss_name[NSS_NAME_SIZE + 1];
+
+static void __init setup_boot_command_line(void);
+
+/*
+ * Get the TOD clock running.
+ */
+static void __init reset_tod_clock(void)
+{
+ u64 time;
+
+ if (store_tod_clock(&time) == 0)
+ return;
+ /* TOD clock not running. Set the clock to Unix Epoch. */
+ if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
+ disabled_wait(0);
+
+ sched_clock_base_cc = TOD_UNIX_EPOCH;
+ S390_lowcore.last_update_clock = sched_clock_base_cc;
+}
+
+#ifdef CONFIG_SHARED_KERNEL
+int __init savesys_ipl_nss(char *cmd, const int cmdlen);
+
+asm(
+ " .section .init.text,\"ax\",@progbits\n"
+ " .align 4\n"
+ " .type savesys_ipl_nss, @function\n"
+ "savesys_ipl_nss:\n"
+ " stmg 6,15,48(15)\n"
+ " lgr 14,3\n"
+ " sam31\n"
+ " diag 2,14,0x8\n"
+ " sam64\n"
+ " lgr 2,14\n"
+ " lmg 6,15,48(15)\n"
+ " br 14\n"
+ " .size savesys_ipl_nss, .-savesys_ipl_nss\n"
+ " .previous\n");
+
+static __initdata char upper_command_line[COMMAND_LINE_SIZE];
+
+static noinline __init void create_kernel_nss(void)
+{
+ unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
+#ifdef CONFIG_BLK_DEV_INITRD
+ unsigned int sinitrd_pfn, einitrd_pfn;
+#endif
+ int response;
+ int hlen;
+ size_t len;
+ char *savesys_ptr;
+ char defsys_cmd[DEFSYS_CMD_SIZE];
+ char savesys_cmd[SAVESYS_CMD_SIZE];
+
+ /* Do nothing if we are not running under VM */
+ if (!MACHINE_IS_VM)
+ return;
+
+ /* Convert COMMAND_LINE to upper case */
+ for (i = 0; i < strlen(boot_command_line); i++)
+ upper_command_line[i] = toupper(boot_command_line[i]);
+
+ savesys_ptr = strstr(upper_command_line, "SAVESYS=");
+
+ if (!savesys_ptr)
+ return;
+
+ savesys_ptr += 8; /* Point to the beginning of the NSS name */
+ for (i = 0; i < NSS_NAME_SIZE; i++) {
+ if (savesys_ptr[i] == ' ' || savesys_ptr[i] == '\0')
+ break;
+ kernel_nss_name[i] = savesys_ptr[i];
+ }
+
+ stext_pfn = PFN_DOWN(__pa(&_stext));
+ eshared_pfn = PFN_DOWN(__pa(&_eshared));
+ end_pfn = PFN_UP(__pa(&_end));
+ min_size = end_pfn << 2;
+
+ hlen = snprintf(defsys_cmd, DEFSYS_CMD_SIZE,
+ "DEFSYS %s 00000-%.5X EW %.5X-%.5X SR %.5X-%.5X",
+ kernel_nss_name, stext_pfn - 1, stext_pfn,
+ eshared_pfn - 1, eshared_pfn, end_pfn);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (INITRD_START && INITRD_SIZE) {
+ sinitrd_pfn = PFN_DOWN(__pa(INITRD_START));
+ einitrd_pfn = PFN_UP(__pa(INITRD_START + INITRD_SIZE));
+ min_size = einitrd_pfn << 2;
+ hlen += snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+ " EW %.5X-%.5X", sinitrd_pfn, einitrd_pfn);
+ }
+#endif
+
+ snprintf(defsys_cmd + hlen, DEFSYS_CMD_SIZE - hlen,
+ " EW MINSIZE=%.7iK PARMREGS=0-13", min_size);
+ defsys_cmd[DEFSYS_CMD_SIZE - 1] = '\0';
+ snprintf(savesys_cmd, SAVESYS_CMD_SIZE, "SAVESYS %s \n IPL %s",
+ kernel_nss_name, kernel_nss_name);
+ savesys_cmd[SAVESYS_CMD_SIZE - 1] = '\0';
+
+ __cpcmd(defsys_cmd, NULL, 0, &response);
+
+ if (response != 0) {
+ pr_err("Defining the Linux kernel NSS failed with rc=%d\n",
+ response);
+ kernel_nss_name[0] = '\0';
+ return;
+ }
+
+ len = strlen(savesys_cmd);
+ ASCEBC(savesys_cmd, len);
+ response = savesys_ipl_nss(savesys_cmd, len);
+
+ /* On success: response is equal to the command size,
+ * max SAVESYS_CMD_SIZE
+ * On error: response contains the numeric portion of cp error message.
+ * for SAVESYS it will be >= 263
+ * for missing privilege class, it will be 1
+ */
+ if (response > SAVESYS_CMD_SIZE || response == 1) {
+ pr_err("Saving the Linux kernel NSS failed with rc=%d\n",
+ response);
+ kernel_nss_name[0] = '\0';
+ return;
+ }
+
+ /* re-initialize cputime accounting. */
+ sched_clock_base_cc = get_tod_clock();
+ S390_lowcore.last_update_clock = sched_clock_base_cc;
+ S390_lowcore.last_update_timer = 0x7fffffffffffffffULL;
+ S390_lowcore.user_timer = 0;
+ S390_lowcore.system_timer = 0;
+ asm volatile("SPT 0(%0)" : : "a" (&S390_lowcore.last_update_timer));
+
+ /* re-setup boot command line with new ipl vm parms */
+ ipl_update_parameters();
+ setup_boot_command_line();
+
+ ipl_flags = IPL_NSS_VALID;
+}
+
+#else /* CONFIG_SHARED_KERNEL */
+
+static inline void create_kernel_nss(void) { }
+
+#endif /* CONFIG_SHARED_KERNEL */
+
+/*
+ * Clear bss memory
+ */
+static noinline __init void clear_bss_section(void)
+{
+ memset(__bss_start, 0, __bss_stop - __bss_start);
+}
+
+/*
+ * Initialize storage key for kernel pages
+ */
+static noinline __init void init_kernel_storage_key(void)
+{
+#if PAGE_DEFAULT_KEY
+ unsigned long end_pfn, init_pfn;
+
+ end_pfn = PFN_UP(__pa(&_end));
+
+ for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
+ page_set_storage_key(init_pfn << PAGE_SHIFT,
+ PAGE_DEFAULT_KEY, 0);
+#endif
+}
+
+static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static noinline __init void detect_machine_type(void)
+{
+ struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
+ /* Check current-configuration-level */
+ if (stsi(NULL, 0, 0, 0) <= 2) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+ return;
+ }
+ /* Get virtual-machine cpu information. */
+ if (stsi(vmms, 3, 2, 2) || !vmms->count)
+ return;
+
+ /* Running under KVM? If not we assume z/VM */
+ if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
+ else
+ S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
+}
+
+static __init void setup_topology(void)
+{
+ int max_mnest;
+
+ if (!test_facility(11))
+ return;
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+ for (max_mnest = 6; max_mnest > 1; max_mnest--) {
+ if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
+ break;
+ }
+ topology_max_mnest = max_mnest;
+}
+
+static void early_pgm_check_handler(void)
+{
+ const struct exception_table_entry *fixup;
+ unsigned long cr0, cr0_new;
+ unsigned long addr;
+
+ addr = S390_lowcore.program_old_psw.addr;
+ fixup = search_exception_tables(addr & PSW_ADDR_INSN);
+ if (!fixup)
+ disabled_wait(0);
+ /* Disable low address protection before storing into lowcore. */
+ __ctl_store(cr0, 0, 0);
+ cr0_new = cr0 & ~(1UL << 28);
+ __ctl_load(cr0_new, 0, 0);
+ S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+ __ctl_load(cr0, 0, 0);
+}
+
+static noinline __init void setup_lowcore_early(void)
+{
+ psw_t psw;
+
+ psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
+ psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler;
+ S390_lowcore.external_new_psw = psw;
+ psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+ S390_lowcore.program_new_psw = psw;
+ s390_base_pgm_handler_fn = early_pgm_check_handler;
+}
+
+static noinline __init void setup_facility_list(void)
+{
+ stfle(S390_lowcore.stfle_fac_list,
+ ARRAY_SIZE(S390_lowcore.stfle_fac_list));
+}
+
+static __init void detect_diag9c(void)
+{
+ unsigned int cpu_address;
+ int rc;
+
+ cpu_address = stap();
+ asm volatile(
+ " diag %2,0,0x9c\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
+ if (!rc)
+ S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
+}
+
+static __init void detect_diag44(void)
+{
+ int rc;
+
+ asm volatile(
+ " diag 0,0,0x44\n"
+ "0: la %0,0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
+ if (!rc)
+ S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
+}
+
+static __init void detect_machine_facilities(void)
+{
+ if (test_facility(8)) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+ __ctl_set_bit(0, 23);
+ }
+ if (test_facility(78))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
+ if (test_facility(3))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
+ if (test_facility(40))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
+ if (test_facility(50) && test_facility(73))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+ if (test_facility(51))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+ if (test_facility(129))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+}
+
+static int __init cad_setup(char *str)
+{
+ int val;
+
+ get_option(&str, &val);
+ if (val && test_facility(128))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
+ return 0;
+}
+early_param("cad", cad_setup);
+
+static int __init cad_init(void)
+{
+ if (MACHINE_HAS_CAD)
+ /* Enable problem state CAD. */
+ __ctl_set_bit(2, 3);
+ return 0;
+}
+early_initcall(cad_init);
+
+static __init void rescue_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
+ /*
+ * Just like in case of IPL from VM reader we make sure there is a
+ * gap of 4MB between end of kernel and start of initrd.
+ * That way we can also be sure that saving an NSS will succeed,
+ * which however only requires different segments.
+ */
+ if (!INITRD_START || !INITRD_SIZE)
+ return;
+ if (INITRD_START >= min_initrd_addr)
+ return;
+ memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+ INITRD_START = min_initrd_addr;
+#endif
+}
+
+/* Set up boot command line */
+static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
+{
+ char *parm, *delim;
+ size_t rc, len;
+
+ len = strlen(boot_command_line);
+
+ delim = boot_command_line + len; /* '\0' character position */
+ parm = boot_command_line + len + 1; /* append right after '\0' */
+
+ rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1);
+ if (rc) {
+ if (*parm == '=')
+ memmove(boot_command_line, parm + 1, rc);
+ else
+ *delim = ' '; /* replace '\0' with space */
+ }
+}
+
+static inline int has_ebcdic_char(const char *str)
+{
+ int i;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] & 0x80)
+ return 1;
+ return 0;
+}
+
+static void __init setup_boot_command_line(void)
+{
+ COMMAND_LINE[ARCH_COMMAND_LINE_SIZE - 1] = 0;
+ /* convert arch command line to ascii if necessary */
+ if (has_ebcdic_char(COMMAND_LINE))
+ EBCASC(COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
+ /* copy arch command line */
+ strlcpy(boot_command_line, strstrip(COMMAND_LINE),
+ ARCH_COMMAND_LINE_SIZE);
+
+ /* append IPL PARM data to the boot command line */
+ if (MACHINE_IS_VM)
+ append_to_cmdline(append_ipl_vmparm);
+
+ append_to_cmdline(append_ipl_scpdata);
+}
+
+/*
+ * Save ipl parameters, clear bss memory, initialize storage keys
+ * and create a kernel NSS at startup if the SAVESYS= parm is defined
+ */
+void __init startup_init(void)
+{
+ reset_tod_clock();
+ ipl_save_parameters();
+ rescue_initrd();
+ clear_bss_section();
+ init_kernel_storage_key();
+ lockdep_init();
+ lockdep_off();
+ setup_lowcore_early();
+ setup_facility_list();
+ detect_machine_type();
+ ipl_update_parameters();
+ setup_boot_command_line();
+ create_kernel_nss();
+ detect_diag9c();
+ detect_diag44();
+ detect_machine_facilities();
+ setup_topology();
+ sclp_early_detect();
+ lockdep_on();
+}
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
new file mode 100644
index 000000000..b971c6be6
--- /dev/null
+++ b/arch/s390/kernel/ebcdic.c
@@ -0,0 +1,400 @@
+/*
+ * ECBDIC -> ASCII, ASCII -> ECBDIC,
+ * upper to lower case (EBCDIC) conversion tables.
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Martin Peschke <peschke@fh-brandenburg.de>
+ */
+
+#include <linux/module.h>
+#include <asm/types.h>
+#include <asm/ebcdic.h>
+
+/*
+ * ASCII (IBM PC 437) -> EBCDIC 037
+ */
+__u8 _ascebc[256] =
+{
+ /*00 NUL SOH STX ETX EOT ENQ ACK BEL */
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08 BS HT LF VT FF CR SO SI */
+ /* ->NL */
+ 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
+ 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN EM SUB ESC FS GS RS US */
+ /* ->IGS ->IRS ->IUS */
+ 0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20 SP ! " # $ % & ' */
+ 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28 ( ) * + , - . / */
+ 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30 0 1 2 3 4 5 6 7 */
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38 8 9 : ; < = > ? */
+ 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40 @ A B C D E F G */
+ 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48 H I J K L M N O */
+ 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50 P Q R S T U V W */
+ 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58 X Y Z [ \ ] ^ _ */
+ 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
+ /*60 ` a b c d e f g */
+ 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68 h i j k l m n o */
+ 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70 p q r s t u v w */
+ 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78 x y z { | } ~ DL */
+ 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
+ /*80*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0 sz */
+ 0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+ 0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 037 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc[256] =
+{
+ /* 0x00 NUL SOH STX ETX *SEL HT *RNL DEL */
+ 0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08 -GE -SPS -RPT VT FF CR SO SI */
+ 0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10 DLE DC1 DC2 DC3 -RES -NL BS -POC
+ -ENP ->LF */
+ 0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18 CAN EM -UBS -CU1 -IFS -IGS -IRS -ITB
+ -IUS */
+ 0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20 -DS -SOS FS -WUS -BYP LF ETB ESC
+ -INP */
+ 0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28 -SA -SFE -SM -CSP -MFA ENQ ACK BEL
+ -SW */
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30 ---- ---- SYN -IR -PP -TRN -NBS EOT */
+ 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */
+ 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40 SP RSP ä ---- */
+ 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48 . < ( + | */
+ 0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+ /* 0x50 & ---- */
+ 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58 ß ! $ * ) ; */
+ 0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA,
+ /* 0x60 - / ---- Ä ---- ---- ---- */
+ 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68 ---- , % _ > ? */
+ 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70 ---- ---- ---- ---- ---- ---- ---- */
+ 0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78 * ` : # @ ' = " */
+ 0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80 * a b c d e f g */
+ 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88 h i ---- ---- ---- */
+ 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90 ° j k l m n o p */
+ 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98 q r ---- ---- */
+ 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0 ~ s t u v w x */
+ 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8 y z ---- ---- ---- ---- */
+ 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0 ^ ---- § ---- */
+ 0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8 ---- [ ] ---- ---- ---- ---- */
+ 0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0 { A B C D E F G */
+ 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8 H I ---- ö ---- */
+ 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0 } J K L M N O P */
+ 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8 Q R ---- ü */
+ 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0 \ S T U V W X */
+ 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8 Y Z ---- Ö ---- ---- ---- */
+ 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0 0 1 2 3 4 5 6 7 */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
+ 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * ASCII (IBM PC 437) -> EBCDIC 500
+ */
+__u8 _ascebc_500[256] =
+{
+ /*00 NUL SOH STX ETX EOT ENQ ACK BEL */
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08 BS HT LF VT FF CR SO SI */
+ /* ->NL */
+ 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
+ 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN EM SUB ESC FS GS RS US */
+ /* ->IGS ->IRS ->IUS */
+ 0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20 SP ! " # $ % & ' */
+ 0x40, 0x4F, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28 ( ) * + , - . / */
+ 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30 0 1 2 3 4 5 6 7 */
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38 8 9 : ; < = > ? */
+ 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40 @ A B C D E F G */
+ 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48 H I J K L M N O */
+ 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50 P Q R S T U V W */
+ 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58 X Y Z [ \ ] ^ _ */
+ 0xE7, 0xE8, 0xE9, 0x4A, 0xE0, 0x5A, 0x5F, 0x6D,
+ /*60 ` a b c d e f g */
+ 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68 h i j k l m n o */
+ 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70 p q r s t u v w */
+ 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78 x y z { | } ~ DL */
+ 0xA7, 0xA8, 0xA9, 0xC0, 0xBB, 0xD0, 0xA1, 0x07,
+ /*80*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0 sz */
+ 0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+ 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+ 0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 500 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc_500[256] =
+{
+ /* 0x00 NUL SOH STX ETX *SEL HT *RNL DEL */
+ 0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08 -GE -SPS -RPT VT FF CR SO SI */
+ 0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10 DLE DC1 DC2 DC3 -RES -NL BS -POC
+ -ENP ->LF */
+ 0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18 CAN EM -UBS -CU1 -IFS -IGS -IRS -ITB
+ -IUS */
+ 0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20 -DS -SOS FS -WUS -BYP LF ETB ESC
+ -INP */
+ 0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28 -SA -SFE -SM -CSP -MFA ENQ ACK BEL
+ -SW */
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30 ---- ---- SYN -IR -PP -TRN -NBS EOT */
+ 0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38 -SBS -IT -RFF -CU3 DC4 NAK ---- SUB */
+ 0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40 SP RSP ä ---- */
+ 0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48 [ . < ( + ! */
+ 0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21,
+ /* 0x50 & ---- */
+ 0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58 ß ] $ * ) ; ^ */
+ 0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+ /* 0x60 - / ---- Ä ---- ---- ---- */
+ 0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68 ---- , % _ > ? */
+ 0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70 ---- ---- ---- ---- ---- ---- ---- */
+ 0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78 * ` : # @ ' = " */
+ 0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80 * a b c d e f g */
+ 0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88 h i ---- ---- ---- */
+ 0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90 ° j k l m n o p */
+ 0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98 q r ---- ---- */
+ 0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0 ~ s t u v w x */
+ 0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8 y z ---- ---- ---- ---- */
+ 0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0 ---- § ---- */
+ 0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8 ---- | ---- ---- ---- ---- */
+ 0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0 { A B C D E F G */
+ 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8 H I ---- ö ---- */
+ 0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0 } J K L M N O P */
+ 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8 Q R ---- ü */
+ 0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0 \ S T U V W X */
+ 0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8 Y Z ---- Ö ---- ---- ---- */
+ 0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0 0 1 2 3 4 5 6 7 */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8 8 9 ---- ---- Ü ---- ---- ---- */
+ 0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from upper to lower case
+ */
+__u8 _ebc_tolower[256] =
+{
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+ 0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
+ 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+ 0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
+ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+ 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+ 0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+ 0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+ 0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+ 0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from lower to upper case
+ */
+__u8 _ebc_toupper[256] =
+{
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+ 0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+ 0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+ 0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
+ 0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
+ 0xA0, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
+ 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+ 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+ 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ 0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+ 0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
+ 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+ 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+ 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
+};
+
+EXPORT_SYMBOL(_ascebc_500);
+EXPORT_SYMBOL(_ebcasc_500);
+EXPORT_SYMBOL(_ascebc);
+EXPORT_SYMBOL(_ebcasc);
+EXPORT_SYMBOL(_ebc_tolower);
+EXPORT_SYMBOL(_ebc_toupper);
+
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
new file mode 100644
index 000000000..99b44acbf
--- /dev/null
+++ b/arch/s390/kernel/entry.S
@@ -0,0 +1,1059 @@
+/*
+ * S390 low-level entry points.
+ *
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Hartmut Penner (hp@de.ibm.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+#include <asm/irq.h>
+
+__PT_R0 = __PT_GPRS
+__PT_R1 = __PT_GPRS + 8
+__PT_R2 = __PT_GPRS + 16
+__PT_R3 = __PT_GPRS + 24
+__PT_R4 = __PT_GPRS + 32
+__PT_R5 = __PT_GPRS + 40
+__PT_R6 = __PT_GPRS + 48
+__PT_R7 = __PT_GPRS + 56
+__PT_R8 = __PT_GPRS + 64
+__PT_R9 = __PT_GPRS + 72
+__PT_R10 = __PT_GPRS + 80
+__PT_R11 = __PT_GPRS + 88
+__PT_R12 = __PT_GPRS + 96
+__PT_R13 = __PT_GPRS + 104
+__PT_R14 = __PT_GPRS + 112
+__PT_R15 = __PT_GPRS + 120
+
+STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
+STACK_SIZE = 1 << STACK_SHIFT
+STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
+
+_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+ _TIF_UPROBE)
+_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+ _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
+_PIF_WORK = (_PIF_PER_TRAP)
+
+#define BASED(name) name-system_call(%r13)
+
+ .macro TRACE_IRQS_ON
+#ifdef CONFIG_TRACE_IRQFLAGS
+ basr %r2,%r0
+ brasl %r14,trace_hardirqs_on_caller
+#endif
+ .endm
+
+ .macro TRACE_IRQS_OFF
+#ifdef CONFIG_TRACE_IRQFLAGS
+ basr %r2,%r0
+ brasl %r14,trace_hardirqs_off_caller
+#endif
+ .endm
+
+ .macro LOCKDEP_SYS_EXIT
+#ifdef CONFIG_LOCKDEP
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jz .+10
+ brasl %r14,lockdep_sys_exit
+#endif
+ .endm
+
+ .macro LPP newpp
+#if IS_ENABLED(CONFIG_KVM)
+ tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
+ jz .+8
+ .insn s,0xb2800000,\newpp
+#endif
+ .endm
+
+ .macro HANDLE_SIE_INTERCEPT scratch,reason
+#if IS_ENABLED(CONFIG_KVM)
+ tmhh %r8,0x0001 # interrupting from user ?
+ jnz .+62
+ lgr \scratch,%r9
+ slg \scratch,BASED(.Lsie_critical)
+ clg \scratch,BASED(.Lsie_critical_length)
+ .if \reason==1
+ # Some program interrupts are suppressing (e.g. protection).
+ # We must also check the instruction after SIE in that case.
+ # do_protection_exception will rewind to .Lrewind_pad
+ jh .+42
+ .else
+ jhe .+42
+ .endif
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ LPP __SF_EMPTY+16(%r15) # set host id
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ larl %r9,sie_exit # skip forward to sie_exit
+ mvi __SF_EMPTY+31(%r15),\reason # set exit reason
+#endif
+ .endm
+
+ .macro CHECK_STACK stacksize,savearea
+#ifdef CONFIG_CHECK_STACK
+ tml %r15,\stacksize - CONFIG_STACK_GUARD
+ lghi %r14,\savearea
+ jz stack_overflow
+#endif
+ .endm
+
+ .macro SWITCH_ASYNC savearea,stack,shift
+ tmhh %r8,0x0001 # interrupting from user ?
+ jnz 1f
+ lgr %r14,%r9
+ slg %r14,BASED(.Lcritical_start)
+ clg %r14,BASED(.Lcritical_length)
+ jhe 0f
+ lghi %r11,\savearea # inside critical section, do cleanup
+ brasl %r14,cleanup_critical
+ tmhh %r8,0x0001 # retest problem state after cleanup
+ jnz 1f
+0: lg %r14,\stack # are we already on the target stack?
+ slgr %r14,%r15
+ srag %r14,%r14,\shift
+ jnz 1f
+ CHECK_STACK 1<<\shift,\savearea
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 2f
+1: lg %r15,\stack # load target stack
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ .endm
+
+ .macro UPDATE_VTIME scratch,enter_timer
+ lg \scratch,__LC_EXIT_TIMER
+ slg \scratch,\enter_timer
+ alg \scratch,__LC_USER_TIMER
+ stg \scratch,__LC_USER_TIMER
+ lg \scratch,__LC_LAST_UPDATE_TIMER
+ slg \scratch,__LC_EXIT_TIMER
+ alg \scratch,__LC_SYSTEM_TIMER
+ stg \scratch,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
+ .endm
+
+ .macro LAST_BREAK scratch
+ srag \scratch,%r10,23
+ jz .+10
+ stg %r10,__TI_last_break(%r12)
+ .endm
+
+ .macro REENABLE_IRQS
+ stg %r8,__LC_RETURN_PSW
+ ni __LC_RETURN_PSW,0xbf
+ ssm __LC_RETURN_PSW
+ .endm
+
+ .macro STCK savearea
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ .insn s,0xb27c0000,\savearea # store clock fast
+#else
+ .insn s,0xb2050000,\savearea # store clock
+#endif
+ .endm
+
+ .section .kprobes.text, "ax"
+
+/*
+ * Scheduler resume function, called by switch_to
+ * gpr2 = (task_struct *) prev
+ * gpr3 = (task_struct *) next
+ * Returns:
+ * gpr2 = prev
+ */
+ENTRY(__switch_to)
+ stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
+ stg %r15,__THREAD_ksp(%r2) # store kernel stack of prev
+ lg %r4,__THREAD_info(%r2) # get thread_info of prev
+ lg %r5,__THREAD_info(%r3) # get thread_info of next
+ lgr %r15,%r5
+ aghi %r15,STACK_INIT # end of kernel stack of next
+ stg %r3,__LC_CURRENT # store task struct of next
+ stg %r5,__LC_THREAD_INFO # store thread info of next
+ stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
+ mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
+ lg %r15,__THREAD_ksp(%r3) # load kernel stack of next
+ lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
+ br %r14
+
+.L__critical_start:
+/*
+ * SVC interrupt handler routine. System calls are synchronous events and
+ * are executed with interrupts enabled.
+ */
+
+ENTRY(system_call)
+ stpt __LC_SYNC_ENTER_TIMER
+.Lsysc_stmg:
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ lghi %r14,_PIF_SYSCALL
+.Lsysc_per:
+ lg %r15,__LC_KERNEL_STACK
+ la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
+.Lsysc_vtime:
+ UPDATE_VTIME %r13,__LC_SYNC_ENTER_TIMER
+ LAST_BREAK %r13
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+ mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
+ stg %r14,__PT_FLAGS(%r11)
+.Lsysc_do_svc:
+ lg %r10,__TI_sysc_table(%r12) # address of system call table
+ llgh %r8,__PT_INT_CODE+2(%r11)
+ slag %r8,%r8,2 # shift and test for svc 0
+ jnz .Lsysc_nr_ok
+ # svc 0: system call number in %r1
+ llgfr %r1,%r1 # clear high word in r1
+ cghi %r1,NR_syscalls
+ jnl .Lsysc_nr_ok
+ sth %r1,__PT_INT_CODE+2(%r11)
+ slag %r8,%r1,2
+.Lsysc_nr_ok:
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ stg %r2,__PT_ORIG_GPR2(%r11)
+ stg %r7,STACK_FRAME_OVERHEAD(%r15)
+ lgf %r9,0(%r8,%r10) # get system call add.
+ tm __TI_flags+7(%r12),_TIF_TRACE
+ jnz .Lsysc_tracesys
+ basr %r14,%r9 # call sys_xxxx
+ stg %r2,__PT_R2(%r11) # store return value
+
+.Lsysc_return:
+ LOCKDEP_SYS_EXIT
+.Lsysc_tif:
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lsysc_restore
+ tm __PT_FLAGS+7(%r11),_PIF_WORK
+ jnz .Lsysc_work
+ tm __TI_flags+7(%r12),_TIF_WORK
+ jnz .Lsysc_work # check for work
+ tm __LC_CPU_FLAGS+7,_CIF_WORK
+ jnz .Lsysc_work
+.Lsysc_restore:
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+ lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_PSW
+.Lsysc_done:
+
+#
+# One of the work bits is on. Find out which one.
+#
+.Lsysc_work:
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+ jo .Lsysc_mcck_pending
+ tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
+ jo .Lsysc_reschedule
+#ifdef CONFIG_UPROBES
+ tm __TI_flags+7(%r12),_TIF_UPROBE
+ jo .Lsysc_uprobe_notify
+#endif
+ tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP
+ jo .Lsysc_singlestep
+ tm __TI_flags+7(%r12),_TIF_SIGPENDING
+ jo .Lsysc_sigpending
+ tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+ jo .Lsysc_notify_resume
+ tm __LC_CPU_FLAGS+7,_CIF_ASCE
+ jo .Lsysc_uaccess
+ j .Lsysc_return # beware of critical section cleanup
+
+#
+# _TIF_NEED_RESCHED is set, call schedule
+#
+.Lsysc_reschedule:
+ larl %r14,.Lsysc_return
+ jg schedule
+
+#
+# _CIF_MCCK_PENDING is set, call handler
+#
+.Lsysc_mcck_pending:
+ larl %r14,.Lsysc_return
+ jg s390_handle_mcck # TIF bit will be cleared by handler
+
+#
+# _CIF_ASCE is set, load user space asce
+#
+.Lsysc_uaccess:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j .Lsysc_return
+
+#
+# _TIF_SIGPENDING is set, call do_signal
+#
+.Lsysc_sigpending:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_signal
+ tm __PT_FLAGS+7(%r11),_PIF_SYSCALL
+ jno .Lsysc_return
+ lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
+ lg %r10,__TI_sysc_table(%r12) # address of system call table
+ lghi %r8,0 # svc 0 returns -ENOSYS
+ llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number
+ cghi %r1,NR_syscalls
+ jnl .Lsysc_nr_ok # invalid svc number -> do svc 0
+ slag %r8,%r1,2
+ j .Lsysc_nr_ok # restart svc
+
+#
+# _TIF_NOTIFY_RESUME is set, call do_notify_resume
+#
+.Lsysc_notify_resume:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg do_notify_resume
+
+#
+# _TIF_UPROBE is set, call uprobe_notify_resume
+#
+#ifdef CONFIG_UPROBES
+.Lsysc_uprobe_notify:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg uprobe_notify_resume
+#endif
+
+#
+# _PIF_PER_TRAP is set, call do_per_trap
+#
+.Lsysc_singlestep:
+ ni __PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg do_per_trap
+
+#
+# call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
+# and after the system call
+#
+.Lsysc_tracesys:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ la %r3,0
+ llgh %r0,__PT_INT_CODE+2(%r11)
+ stg %r0,__PT_R2(%r11)
+ brasl %r14,do_syscall_trace_enter
+ lghi %r0,NR_syscalls
+ clgr %r0,%r2
+ jnh .Lsysc_tracenogo
+ sllg %r8,%r2,2
+ lgf %r9,0(%r8,%r10)
+.Lsysc_tracego:
+ lmg %r3,%r7,__PT_R3(%r11)
+ stg %r7,STACK_FRAME_OVERHEAD(%r15)
+ lg %r2,__PT_ORIG_GPR2(%r11)
+ basr %r14,%r9 # call sys_xxx
+ stg %r2,__PT_R2(%r11) # store return value
+.Lsysc_tracenogo:
+ tm __TI_flags+7(%r12),_TIF_TRACE
+ jz .Lsysc_return
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg do_syscall_trace_exit
+
+#
+# a new process exits the kernel with ret_from_fork
+#
+ENTRY(ret_from_fork)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ lg %r12,__LC_THREAD_INFO
+ brasl %r14,schedule_tail
+ TRACE_IRQS_ON
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
+ jne .Lsysc_tracenogo
+ # it's a kernel thread
+ lmg %r9,%r10,__PT_R9(%r11) # load gprs
+ENTRY(kernel_thread_starter)
+ la %r2,0(%r10)
+ basr %r14,%r9
+ j .Lsysc_tracenogo
+
+/*
+ * Program check handler routine
+ */
+
+ENTRY(pgm_check_handler)
+ stpt __LC_SYNC_ENTER_TIMER
+ stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ larl %r13,system_call
+ lmg %r8,%r9,__LC_PGM_OLD_PSW
+ HANDLE_SIE_INTERCEPT %r14,1
+ tmhh %r8,0x0001 # test problem state bit
+ jnz 1f # -> fault in user space
+ tmhh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 0f # -> enabled, can't be a double fault
+ tm __LC_PGM_ILC+3,0x80 # check for per exception
+ jnz .Lpgm_svcper # -> single stepped svc
+0: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC
+ aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j 2f
+1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
+ LAST_BREAK %r14
+ lg %r15,__LC_KERNEL_STACK
+ lg %r14,__TI_task(%r12)
+ lghi %r13,__LC_PGM_TDB
+ tm __LC_PGM_ILC+2,0x02 # check for transaction abort
+ jz 2f
+ mvc __THREAD_trap_tdb(256,%r14),0(%r13)
+2: la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC
+ mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ stg %r10,__PT_ARGS(%r11)
+ tm __LC_PGM_ILC+3,0x80 # check for per exception
+ jz 0f
+ tmhh %r8,0x0001 # kernel per event ?
+ jz .Lpgm_kprobe
+ oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
+ mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
+ mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
+ mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
+0: REENABLE_IRQS
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ larl %r1,pgm_check_table
+ llgh %r10,__PT_INT_CODE+2(%r11)
+ nill %r10,0x007f
+ sll %r10,2
+ je .Lsysc_return
+ lgf %r1,0(%r10,%r1) # load address of handler routine
+ lgr %r2,%r11 # pass pointer to pt_regs
+ basr %r14,%r1 # branch to interrupt-handler
+ j .Lsysc_return
+
+#
+# PER event in supervisor state, must be kprobes
+#
+.Lpgm_kprobe:
+ REENABLE_IRQS
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_per_trap
+ j .Lsysc_return
+
+#
+# single stepped system call
+#
+.Lpgm_svcper:
+ mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+ larl %r14,.Lsysc_per
+ stg %r14,__LC_RETURN_PSW+8
+ lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
+ lpswe __LC_RETURN_PSW # branch to .Lsysc_per and enable irqs
+
+/*
+ * IO interrupt handler routine
+ */
+ENTRY(io_int_handler)
+ STCK __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
+ stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ larl %r13,system_call
+ lmg %r8,%r9,__LC_IO_OLD_PSW
+ HANDLE_SIE_INTERCEPT %r14,2
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+ tmhh %r8,0x0001 # interrupting from user?
+ jz .Lio_skip
+ UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
+ LAST_BREAK %r14
+.Lio_skip:
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ TRACE_IRQS_OFF
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+.Lio_loop:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,IO_INTERRUPT
+ tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ?
+ jz .Lio_call
+ lghi %r3,THIN_INTERRUPT
+.Lio_call:
+ brasl %r14,do_IRQ
+ tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR
+ jz .Lio_return
+ tpi 0
+ jz .Lio_return
+ mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+ j .Lio_loop
+.Lio_return:
+ LOCKDEP_SYS_EXIT
+ TRACE_IRQS_ON
+.Lio_tif:
+ tm __TI_flags+7(%r12),_TIF_WORK
+ jnz .Lio_work # there is work to do (signals etc.)
+ tm __LC_CPU_FLAGS+7,_CIF_WORK
+ jnz .Lio_work
+.Lio_restore:
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+ lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_PSW
+.Lio_done:
+
+#
+# There is work todo, find out in which context we have been interrupted:
+# 1) if we return to user space we can do all _TIF_WORK work
+# 2) if we return to kernel code and kvm is enabled check if we need to
+# modify the psw to leave SIE
+# 3) if we return to kernel code and preemptive scheduling is enabled check
+# the preemption counter and if it is zero call preempt_schedule_irq
+# Before any work can be done, a switch to the kernel stack is required.
+#
+.Lio_work:
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jo .Lio_work_user # yes -> do resched & signal
+#ifdef CONFIG_PREEMPT
+ # check for preemptive scheduling
+ icm %r0,15,__TI_precount(%r12)
+ jnz .Lio_restore # preemption is disabled
+ tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
+ jno .Lio_restore
+ # switch to kernel stack
+ lg %r1,__PT_R15(%r11)
+ aghi %r1,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
+ lgr %r15,%r1
+ # TRACE_IRQS_ON already done at .Lio_return, call
+ # TRACE_IRQS_OFF to keep things symmetrical
+ TRACE_IRQS_OFF
+ brasl %r14,preempt_schedule_irq
+ j .Lio_return
+#else
+ j .Lio_restore
+#endif
+
+#
+# Need to do work before returning to userspace, switch to kernel stack
+#
+.Lio_work_user:
+ lg %r1,__LC_KERNEL_STACK
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
+ lgr %r15,%r1
+
+#
+# One of the work bits is on. Find out which one.
+#
+.Lio_work_tif:
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+ jo .Lio_mcck_pending
+ tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
+ jo .Lio_reschedule
+ tm __TI_flags+7(%r12),_TIF_SIGPENDING
+ jo .Lio_sigpending
+ tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+ jo .Lio_notify_resume
+ tm __LC_CPU_FLAGS+7,_CIF_ASCE
+ jo .Lio_uaccess
+ j .Lio_return # beware of critical section cleanup
+
+#
+# _CIF_MCCK_PENDING is set, call handler
+#
+.Lio_mcck_pending:
+ # TRACE_IRQS_ON already done at .Lio_return
+ brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
+# _CIF_ASCE is set, load user space asce
+#
+.Lio_uaccess:
+ ni __LC_CPU_FLAGS+7,255-_CIF_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+ j .Lio_return
+
+#
+# _TIF_NEED_RESCHED is set, call schedule
+#
+.Lio_reschedule:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ brasl %r14,schedule # call scheduler
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
+# _TIF_SIGPENDING or is set, call do_signal
+#
+.Lio_sigpending:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_signal
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
+# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
+#
+.Lio_notify_resume:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,do_notify_resume
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+/*
+ * External interrupt handler routine
+ */
+ENTRY(ext_int_handler)
+ STCK __LC_INT_CLOCK
+ stpt __LC_ASYNC_ENTER_TIMER
+ stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ larl %r13,system_call
+ lmg %r8,%r9,__LC_EXT_OLD_PSW
+ HANDLE_SIE_INTERCEPT %r14,3
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT
+ tmhh %r8,0x0001 # interrupting from user ?
+ jz .Lext_skip
+ UPDATE_VTIME %r14,__LC_ASYNC_ENTER_TIMER
+ LAST_BREAK %r14
+.Lext_skip:
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+ stmg %r8,%r9,__PT_PSW(%r11)
+ lghi %r1,__LC_EXT_PARAMS2
+ mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
+ mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+ mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ TRACE_IRQS_OFF
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ lghi %r3,EXT_INTERRUPT
+ brasl %r14,do_IRQ
+ j .Lio_return
+
+/*
+ * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
+ */
+ENTRY(psw_idle)
+ stg %r3,__SF_EMPTY(%r15)
+ larl %r1,.Lpsw_idle_lpsw+4
+ stg %r1,__SF_EMPTY+8(%r15)
+ STCK __CLOCK_IDLE_ENTER(%r2)
+ stpt __TIMER_IDLE_ENTER(%r2)
+.Lpsw_idle_lpsw:
+ lpswe __SF_EMPTY(%r15)
+ br %r14
+.Lpsw_idle_end:
+
+.L__critical_end:
+
+/*
+ * Machine check handler routines
+ */
+ENTRY(mcck_int_handler)
+ STCK __LC_MCCK_CLOCK
+ la %r1,4095 # revalidate r1
+ spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
+ lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
+ lg %r10,__LC_LAST_BREAK
+ lg %r12,__LC_THREAD_INFO
+ larl %r13,system_call
+ lmg %r8,%r9,__LC_MCK_OLD_PSW
+ HANDLE_SIE_INTERCEPT %r14,4
+ tm __LC_MCCK_CODE,0x80 # system damage?
+ jo .Lmcck_panic # yes -> rest of mcck code invalid
+ lghi %r14,__LC_CPU_TIMER_SAVE_AREA
+ mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
+ tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid?
+ jo 3f
+ la %r14,__LC_SYNC_ENTER_TIMER
+ clc 0(8,%r14),__LC_ASYNC_ENTER_TIMER
+ jl 0f
+ la %r14,__LC_ASYNC_ENTER_TIMER
+0: clc 0(8,%r14),__LC_EXIT_TIMER
+ jl 1f
+ la %r14,__LC_EXIT_TIMER
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ jl 2f
+ la %r14,__LC_LAST_UPDATE_TIMER
+2: spt 0(%r14)
+ mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
+3: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid?
+ jno .Lmcck_panic # no -> skip cleanup critical
+ SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_PANIC_STACK,PAGE_SHIFT
+ tm %r8,0x0001 # interrupting from user ?
+ jz .Lmcck_skip
+ UPDATE_VTIME %r14,__LC_MCCK_ENTER_TIMER
+ LAST_BREAK %r14
+.Lmcck_skip:
+ lghi %r14,__LC_GPREGS_SAVE_AREA+64
+ stmg %r0,%r7,__PT_R0(%r11)
+ mvc __PT_R8(64,%r11),0(%r14)
+ stmg %r8,%r9,__PT_PSW(%r11)
+ xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,s390_do_machine_check
+ tm __PT_PSW+1(%r11),0x01 # returning to user ?
+ jno .Lmcck_return
+ lg %r1,__LC_KERNEL_STACK # switch to kernel stack
+ mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
+ xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
+ la %r11,STACK_FRAME_OVERHEAD(%r1)
+ lgr %r15,%r1
+ ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
+ tm __LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
+ jno .Lmcck_return
+ TRACE_IRQS_OFF
+ brasl %r14,s390_handle_mcck
+ TRACE_IRQS_ON
+.Lmcck_return:
+ lg %r14,__LC_VDSO_PER_CPU
+ lmg %r0,%r10,__PT_R0(%r11)
+ mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ jno 0f
+ stpt __LC_EXIT_TIMER
+ mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
+0: lmg %r11,%r15,__PT_R11(%r11)
+ lpswe __LC_RETURN_MCCK_PSW
+
+.Lmcck_panic:
+ lg %r14,__LC_PANIC_STACK
+ slgr %r14,%r15
+ srag %r14,%r14,PAGE_SHIFT
+ jz 0f
+ lg %r15,__LC_PANIC_STACK
+0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ j .Lmcck_skip
+
+#
+# PSW restart interrupt handler
+#
+ENTRY(restart_int_handler)
+ stg %r15,__LC_SAVE_AREA_RESTART
+ lg %r15,__LC_RESTART_STACK
+ aghi %r15,-__PT_SIZE # create pt_regs on stack
+ xc 0(__PT_SIZE,%r15),0(%r15)
+ stmg %r0,%r14,__PT_R0(%r15)
+ mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
+ mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
+ aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
+ xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+ lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA
+ lg %r3,__LC_RESTART_SOURCE
+ ltgr %r3,%r3 # test source cpu address
+ jm 1f # negative -> skip source stop
+0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
+ brc 10,0b # wait for status stored
+1: basr %r14,%r1 # call function
+ stap __SF_EMPTY(%r15) # store cpu address
+ llgh %r3,__SF_EMPTY(%r15)
+2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
+ brc 2,2b
+3: j 3b
+
+ .section .kprobes.text, "ax"
+
+#ifdef CONFIG_CHECK_STACK
+/*
+ * The synchronous or the asynchronous stack overflowed. We are dead.
+ * No need to properly save the registers, we are going to panic anyway.
+ * Setup a pt_regs so that show_trace can provide a good call trace.
+ */
+stack_overflow:
+ lg %r15,__LC_PANIC_STACK # change to panic stack
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
+ stmg %r0,%r7,__PT_R0(%r11)
+ stmg %r8,%r9,__PT_PSW(%r11)
+ mvc __PT_R8(64,%r11),0(%r14)
+ stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lgr %r2,%r11 # pass pointer to pt_regs
+ jg kernel_stack_overflow
+#endif
+
+ .align 8
+.Lcleanup_table:
+ .quad system_call
+ .quad .Lsysc_do_svc
+ .quad .Lsysc_tif
+ .quad .Lsysc_restore
+ .quad .Lsysc_done
+ .quad .Lio_tif
+ .quad .Lio_restore
+ .quad .Lio_done
+ .quad psw_idle
+ .quad .Lpsw_idle_end
+
+cleanup_critical:
+ clg %r9,BASED(.Lcleanup_table) # system_call
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+8) # .Lsysc_do_svc
+ jl .Lcleanup_system_call
+ clg %r9,BASED(.Lcleanup_table+16) # .Lsysc_tif
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+24) # .Lsysc_restore
+ jl .Lcleanup_sysc_tif
+ clg %r9,BASED(.Lcleanup_table+32) # .Lsysc_done
+ jl .Lcleanup_sysc_restore
+ clg %r9,BASED(.Lcleanup_table+40) # .Lio_tif
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+48) # .Lio_restore
+ jl .Lcleanup_io_tif
+ clg %r9,BASED(.Lcleanup_table+56) # .Lio_done
+ jl .Lcleanup_io_restore
+ clg %r9,BASED(.Lcleanup_table+64) # psw_idle
+ jl 0f
+ clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end
+ jl .Lcleanup_idle
+0: br %r14
+
+
+.Lcleanup_system_call:
+ # check if stpt has been executed
+ clg %r9,BASED(.Lcleanup_system_call_insn)
+ jh 0f
+ mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
+ cghi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+0: # check if stmg has been executed
+ clg %r9,BASED(.Lcleanup_system_call_insn+8)
+ jh 0f
+ mvc __LC_SAVE_AREA_SYNC(64),0(%r11)
+0: # check if base register setup + TIF bit load has been done
+ clg %r9,BASED(.Lcleanup_system_call_insn+16)
+ jhe 0f
+ # set up saved registers r10 and r12
+ stg %r10,16(%r11) # r10 last break
+ stg %r12,32(%r11) # r12 thread-info pointer
+0: # check if the user time update has been done
+ clg %r9,BASED(.Lcleanup_system_call_insn+24)
+ jh 0f
+ lg %r15,__LC_EXIT_TIMER
+ slg %r15,__LC_SYNC_ENTER_TIMER
+ alg %r15,__LC_USER_TIMER
+ stg %r15,__LC_USER_TIMER
+0: # check if the system time update has been done
+ clg %r9,BASED(.Lcleanup_system_call_insn+32)
+ jh 0f
+ lg %r15,__LC_LAST_UPDATE_TIMER
+ slg %r15,__LC_EXIT_TIMER
+ alg %r15,__LC_SYSTEM_TIMER
+ stg %r15,__LC_SYSTEM_TIMER
+0: # update accounting time stamp
+ mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+ # do LAST_BREAK
+ lg %r9,16(%r11)
+ srag %r9,%r9,23
+ jz 0f
+ mvc __TI_last_break(8,%r12),16(%r11)
+0: # set up saved register r11
+ lg %r15,__LC_KERNEL_STACK
+ la %r9,STACK_FRAME_OVERHEAD(%r15)
+ stg %r9,24(%r11) # r11 pt_regs pointer
+ # fill pt_regs
+ mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
+ stmg %r0,%r7,__PT_R0(%r9)
+ mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW
+ mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
+ xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
+ mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL
+ # setup saved register r15
+ stg %r15,56(%r11) # r15 stack pointer
+ # set new psw address and exit
+ larl %r9,.Lsysc_do_svc
+ br %r14
+.Lcleanup_system_call_insn:
+ .quad system_call
+ .quad .Lsysc_stmg
+ .quad .Lsysc_per
+ .quad .Lsysc_vtime+18
+ .quad .Lsysc_vtime+42
+
+.Lcleanup_sysc_tif:
+ larl %r9,.Lsysc_tif
+ br %r14
+
+.Lcleanup_sysc_restore:
+ clg %r9,BASED(.Lcleanup_sysc_restore_insn)
+ je 0f
+ lg %r9,24(%r11) # get saved pointer to pt_regs
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
+ mvc 0(64,%r11),__PT_R8(%r9)
+ lmg %r0,%r7,__PT_R0(%r9)
+0: lmg %r8,%r9,__LC_RETURN_PSW
+ br %r14
+.Lcleanup_sysc_restore_insn:
+ .quad .Lsysc_done - 4
+
+.Lcleanup_io_tif:
+ larl %r9,.Lio_tif
+ br %r14
+
+.Lcleanup_io_restore:
+ clg %r9,BASED(.Lcleanup_io_restore_insn)
+ je 0f
+ lg %r9,24(%r11) # get saved r11 pointer to pt_regs
+ mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
+ mvc 0(64,%r11),__PT_R8(%r9)
+ lmg %r0,%r7,__PT_R0(%r9)
+0: lmg %r8,%r9,__LC_RETURN_PSW
+ br %r14
+.Lcleanup_io_restore_insn:
+ .quad .Lio_done - 4
+
+.Lcleanup_idle:
+ # copy interrupt clock & cpu timer
+ mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
+ cghi %r11,__LC_SAVE_AREA_ASYNC
+ je 0f
+ mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+ mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
+0: # check if stck & stpt have been executed
+ clg %r9,BASED(.Lcleanup_idle_insn)
+ jhe 1f
+ mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
+ mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
+1: # account system time going idle
+ lg %r9,__LC_STEAL_TIMER
+ alg %r9,__CLOCK_IDLE_ENTER(%r2)
+ slg %r9,__LC_LAST_UPDATE_CLOCK
+ stg %r9,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
+ lg %r9,__LC_SYSTEM_TIMER
+ alg %r9,__LC_LAST_UPDATE_TIMER
+ slg %r9,__TIMER_IDLE_ENTER(%r2)
+ stg %r9,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
+ # prepare return psw
+ nihh %r8,0xfcfd # clear irq & wait state bits
+ lg %r9,48(%r11) # return from psw_idle
+ br %r14
+.Lcleanup_idle_insn:
+ .quad .Lpsw_idle_lpsw
+
+/*
+ * Integer constants
+ */
+ .align 8
+.Lcritical_start:
+ .quad .L__critical_start
+.Lcritical_length:
+ .quad .L__critical_end - .L__critical_start
+
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ENTRY(sie64a)
+ stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
+ stg %r2,__SF_EMPTY(%r15) # save control block pointer
+ stg %r3,__SF_EMPTY+8(%r15) # save guest register save area
+ xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason
+ lmg %r0,%r13,0(%r3) # load guest gprs 0-13
+ lg %r14,__LC_GMAP # get gmap pointer
+ ltgr %r14,%r14
+ jz .Lsie_gmap
+ lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
+.Lsie_gmap:
+ lg %r14,__SF_EMPTY(%r15) # get control block pointer
+ oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
+ tm __SIE_PROG20+3(%r14),1 # last exit...
+ jnz .Lsie_done
+ LPP __SF_EMPTY(%r15) # set guest id
+ sie 0(%r14)
+.Lsie_done:
+ LPP __SF_EMPTY+16(%r15) # set host id
+ ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other
+# instructions between sie64a and .Lsie_done should not cause program
+# interrupts. So lets use a nop (47 00 00 00) as a landing pad.
+# See also HANDLE_SIE_INTERCEPT
+.Lrewind_pad:
+ nop 0
+ .globl sie_exit
+sie_exit:
+ lg %r14,__SF_EMPTY+8(%r15) # load guest register save area
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
+ lg %r2,__SF_EMPTY+24(%r15) # return exit reason code
+ br %r14
+.Lsie_fault:
+ lghi %r14,-EFAULT
+ stg %r14,__SF_EMPTY+24(%r15) # set exit reason code
+ j sie_exit
+
+ .align 8
+.Lsie_critical:
+ .quad .Lsie_gmap
+.Lsie_critical_length:
+ .quad .Lsie_done - .Lsie_gmap
+
+ EX_TABLE(.Lrewind_pad,.Lsie_fault)
+ EX_TABLE(sie_exit,.Lsie_fault)
+#endif
+
+ .section .rodata, "a"
+#define SYSCALL(esame,emu) .long esame
+ .globl sys_call_table
+sys_call_table:
+#include "syscalls.S"
+#undef SYSCALL
+
+#ifdef CONFIG_COMPAT
+
+#define SYSCALL(esame,emu) .long emu
+ .globl sys_call_table_emu
+sys_call_table_emu:
+#include "syscalls.S"
+#undef SYSCALL
+#endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
new file mode 100644
index 000000000..834df047d
--- /dev/null
+++ b/arch/s390/kernel/entry.h
@@ -0,0 +1,81 @@
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/ptrace.h>
+#include <asm/idle.h>
+
+extern void *restart_stack;
+extern unsigned long suspend_zero_pages;
+
+void system_call(void);
+void pgm_check_handler(void);
+void ext_int_handler(void);
+void io_int_handler(void);
+void mcck_int_handler(void);
+void restart_int_handler(void);
+void restart_call_handler(void);
+void psw_idle(struct s390_idle_data *, unsigned long);
+
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
+
+int alloc_vector_registers(struct task_struct *tsk);
+
+void do_protection_exception(struct pt_regs *regs);
+void do_dat_exception(struct pt_regs *regs);
+
+void addressing_exception(struct pt_regs *regs);
+void data_exception(struct pt_regs *regs);
+void default_trap_handler(struct pt_regs *regs);
+void divide_exception(struct pt_regs *regs);
+void execute_exception(struct pt_regs *regs);
+void hfp_divide_exception(struct pt_regs *regs);
+void hfp_overflow_exception(struct pt_regs *regs);
+void hfp_significance_exception(struct pt_regs *regs);
+void hfp_sqrt_exception(struct pt_regs *regs);
+void hfp_underflow_exception(struct pt_regs *regs);
+void illegal_op(struct pt_regs *regs);
+void operand_exception(struct pt_regs *regs);
+void overflow_exception(struct pt_regs *regs);
+void privileged_op(struct pt_regs *regs);
+void space_switch_exception(struct pt_regs *regs);
+void special_op_exception(struct pt_regs *regs);
+void specification_exception(struct pt_regs *regs);
+void transaction_exception(struct pt_regs *regs);
+void translation_exception(struct pt_regs *regs);
+void vector_exception(struct pt_regs *regs);
+
+void do_per_trap(struct pt_regs *regs);
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
+void syscall_trace(struct pt_regs *regs, int entryexit);
+void kernel_stack_overflow(struct pt_regs * regs);
+void do_signal(struct pt_regs *regs);
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs);
+void do_notify_resume(struct pt_regs *regs);
+
+void __init init_IRQ(void);
+void do_IRQ(struct pt_regs *regs, int irq);
+void do_restart(void);
+void __init startup_init(void);
+void die(struct pt_regs *regs, const char *str);
+int setup_profiling_timer(unsigned int multiplier);
+void __init time_init(void);
+int pfn_is_nosave(unsigned long);
+void s390_early_resume(void);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+
+struct s390_mmap_arg_struct;
+struct fadvise64_64_args;
+struct old_sigaction;
+
+long sys_rt_sigreturn(void);
+long sys_sigreturn(void);
+
+long sys_s390_personality(unsigned int personality);
+long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
+long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
+#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
new file mode 100644
index 000000000..e0eaf1113
--- /dev/null
+++ b/arch/s390/kernel/ftrace.c
@@ -0,0 +1,244 @@
+/*
+ * Dynamic function tracer architecture backend.
+ *
+ * Copyright IBM Corp. 2009,2014
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <trace/syscall.h>
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include "entry.h"
+
+/*
+ * The mcount code looks like this:
+ * stg %r14,8(%r15) # offset 0
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
+ * Total length is 24 bytes. Only the first instruction will be patched
+ * by ftrace_make_call / ftrace_make_nop.
+ * The enabled ftrace code block looks like this:
+ * > brasl %r0,ftrace_caller # offset 0
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
+ * The ftrace function gets called with a non-standard C function call ABI
+ * where r0 contains the return address. It is also expected that the called
+ * function only clobbers r0 and r1, but restores r2-r15.
+ * For module code we can't directly jump to ftrace caller, but need a
+ * trampoline (ftrace_plt), which clobbers also r1.
+ * The return point of the ftrace function has offset 24, so execution
+ * continues behind the mcount block.
+ * The disabled ftrace code block looks like this:
+ * > jg .+24 # offset 0
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
+ * lg %r14,8(%r15) # offset 18
+ * The jg instruction branches to offset 24 to skip as many instructions
+ * as possible.
+ * In case we use gcc's hotpatch feature the original and also the disabled
+ * function prologue contains only a single six byte instruction and looks
+ * like this:
+ * > brcl 0,0 # offset 0
+ * To enable ftrace the code gets patched like above and afterwards looks
+ * like this:
+ * > brasl %r0,ftrace_caller # offset 0
+ */
+
+unsigned long ftrace_plt;
+
+static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
+{
+#ifdef CC_USING_HOTPATCH
+ /* brcl 0,0 */
+ insn->opc = 0xc004;
+ insn->disp = 0;
+#else
+ /* stg r14,8(r15) */
+ insn->opc = 0xe3e0;
+ insn->disp = 0xf0080024;
+#endif
+}
+
+static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ if (insn->opc == BREAKPOINT_INSTRUCTION)
+ return 1;
+#endif
+ return 0;
+}
+
+static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ insn->opc = BREAKPOINT_INSTRUCTION;
+ insn->disp = KPROBE_ON_FTRACE_NOP;
+#endif
+}
+
+static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ insn->opc = BREAKPOINT_INSTRUCTION;
+ insn->disp = KPROBE_ON_FTRACE_CALL;
+#endif
+}
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ return 0;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ struct ftrace_insn orig, new, old;
+
+ if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+ return -EFAULT;
+ if (addr == MCOUNT_ADDR) {
+ /* Initial code replacement */
+ ftrace_generate_orig_insn(&orig);
+ ftrace_generate_nop_insn(&new);
+ } else if (is_kprobe_on_ftrace(&old)) {
+ /*
+ * If we find a breakpoint instruction, a kprobe has been
+ * placed at the beginning of the function. We write the
+ * constant KPROBE_ON_FTRACE_NOP into the remaining four
+ * bytes of the original instruction so that the kprobes
+ * handler can execute a nop, if it reaches this breakpoint.
+ */
+ ftrace_generate_kprobe_call_insn(&orig);
+ ftrace_generate_kprobe_nop_insn(&new);
+ } else {
+ /* Replace ftrace call with a nop. */
+ ftrace_generate_call_insn(&orig, rec->ip);
+ ftrace_generate_nop_insn(&new);
+ }
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *) rec->ip, &new, sizeof(new));
+ return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ struct ftrace_insn orig, new, old;
+
+ if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+ return -EFAULT;
+ if (is_kprobe_on_ftrace(&old)) {
+ /*
+ * If we find a breakpoint instruction, a kprobe has been
+ * placed at the beginning of the function. We write the
+ * constant KPROBE_ON_FTRACE_CALL into the remaining four
+ * bytes of the original instruction so that the kprobes
+ * handler can execute a brasl if it reaches this breakpoint.
+ */
+ ftrace_generate_kprobe_nop_insn(&orig);
+ ftrace_generate_kprobe_call_insn(&new);
+ } else {
+ /* Replace nop with an ftrace call. */
+ ftrace_generate_nop_insn(&orig);
+ ftrace_generate_call_insn(&new, rec->ip);
+ }
+ /* Verify that the to be replaced code matches what we expect. */
+ if (memcmp(&orig, &old, sizeof(old)))
+ return -EINVAL;
+ s390_kernel_write((void *) rec->ip, &new, sizeof(new));
+ return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ return 0;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+
+static int __init ftrace_plt_init(void)
+{
+ unsigned int *ip;
+
+ ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE);
+ if (!ftrace_plt)
+ panic("cannot allocate ftrace plt\n");
+ ip = (unsigned int *) ftrace_plt;
+ ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
+ ip[1] = 0x100a0004;
+ ip[2] = 0x07f10000;
+ ip[3] = FTRACE_ADDR >> 32;
+ ip[4] = FTRACE_ADDR & 0xffffffff;
+ set_memory_ro(ftrace_plt, 1);
+ return 0;
+}
+device_initcall(ftrace_plt_init);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * Hook the return address and push it in the stack of return addresses
+ * in current thread info.
+ */
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+{
+ struct ftrace_graph_ent trace;
+
+ if (unlikely(ftrace_graph_is_dead()))
+ goto out;
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ goto out;
+ ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
+ trace.func = ip;
+ trace.depth = current->curr_ret_stack + 1;
+ /* Only trace if the calling function expects to. */
+ if (!ftrace_graph_entry(&trace))
+ goto out;
+ if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+ goto out;
+ parent = (unsigned long) return_to_handler;
+out:
+ return parent;
+}
+NOKPROBE_SYMBOL(prepare_ftrace_return);
+
+/*
+ * Patch the kernel code at ftrace_graph_caller location. The instruction
+ * there is branch relative on condition. To enable the ftrace graph code
+ * block, we simply patch the mask field of the instruction to zero and
+ * turn the instruction into a nop.
+ * To disable the ftrace graph code the mask field will be patched to
+ * all ones, which turns the instruction into an unconditional branch.
+ */
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ u8 op = 0x04; /* set mask field to zero */
+
+ s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+ return 0;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ u8 op = 0xf4; /* set mask field to all ones */
+
+ s390_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
+ return 0;
+}
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
new file mode 100644
index 000000000..59b7c6470
--- /dev/null
+++ b/arch/s390/kernel/head.S
@@ -0,0 +1,454 @@
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ * Author(s): Hartmut Penner <hp@de.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Rob van der Heij <rvdhei@iae.nl>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ * There are 5 different IPL methods
+ * 1) load the image directly into ram at address 0 and do an PSW restart
+ * 2) linload will load the image from address 0x10000 to memory 0x10000
+ * and start the code thru LPSW 0x0008000080010000 (VM only, deprecated)
+ * 3) generate the tape ipl header, store the generated image on a tape
+ * and ipl from it
+ * In case of SL tape you need to IPL 5 times to get past VOL1 etc
+ * 4) generate the vm reader ipl header, move the generated image to the
+ * VM reader (use option NOH!) and do a ipl from reader (VM only)
+ * 5) direct call of start by the SALIPL loader
+ * We use the cpuid to distinguish between VM and native ipl
+ * params for kernel are pushed to 0x10400 (see setup.h)
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+
+#define ARCH_OFFSET 4
+
+__HEAD
+
+#define IPL_BS 0x730
+ .org 0
+ .long 0x00080000,0x80000000+iplstart # The first 24 bytes are loaded
+ .long 0x02000018,0x60000050 # by ipl to addresses 0-23.
+ .long 0x02000068,0x60000050 # (a PSW and two CCWs).
+ .fill 80-24,1,0x40 # bytes 24-79 are discarded !!
+ .long 0x020000f0,0x60000050 # The next 160 byte are loaded
+ .long 0x02000140,0x60000050 # to addresses 0x18-0xb7
+ .long 0x02000190,0x60000050 # They form the continuation
+ .long 0x020001e0,0x60000050 # of the CCW program started
+ .long 0x02000230,0x60000050 # by ipl and load the range
+ .long 0x02000280,0x60000050 # 0x0f0-0x730 from the image
+ .long 0x020002d0,0x60000050 # to the range 0x0f0-0x730
+ .long 0x02000320,0x60000050 # in memory. At the end of
+ .long 0x02000370,0x60000050 # the channel program the PSW
+ .long 0x020003c0,0x60000050 # at location 0 is loaded.
+ .long 0x02000410,0x60000050 # Initial processing starts
+ .long 0x02000460,0x60000050 # at 0x200 = iplstart.
+ .long 0x020004b0,0x60000050
+ .long 0x02000500,0x60000050
+ .long 0x02000550,0x60000050
+ .long 0x020005a0,0x60000050
+ .long 0x020005f0,0x60000050
+ .long 0x02000640,0x60000050
+ .long 0x02000690,0x60000050
+ .long 0x020006e0,0x20000050
+
+ .org 0x200
+#
+# subroutine to set architecture mode
+#
+.Lsetmode:
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
+ br %r14
+
+#
+# subroutine to wait for end I/O
+#
+.Lirqwait:
+ mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw
+ lpsw .Lwaitpsw
+.Lioint:
+ br %r14
+ .align 8
+.Lnewpsw:
+ .quad 0x0000000080000000,.Lioint
+.Lwaitpsw:
+ .long 0x020a0000,0x80000000+.Lioint
+
+#
+# subroutine for loading cards from the reader
+#
+.Lloader:
+ la %r4,0(%r14)
+ la %r3,.Lorb # r2 = address of orb into r2
+ la %r5,.Lirb # r4 = address of irb
+ la %r6,.Lccws
+ la %r7,20
+.Linit:
+ st %r2,4(%r6) # initialize CCW data addresses
+ la %r2,0x50(%r2)
+ la %r6,8(%r6)
+ bct 7,.Linit
+
+ lctl %c6,%c6,.Lcr6 # set IO subclass mask
+ slr %r2,%r2
+.Lldlp:
+ ssch 0(%r3) # load chunk of 1600 bytes
+ bnz .Llderr
+.Lwait4irq:
+ bas %r14,.Lirqwait
+ c %r1,0xb8 # compare subchannel number
+ bne .Lwait4irq
+ tsch 0(%r5)
+
+ slr %r0,%r0
+ ic %r0,8(%r5) # get device status
+ chi %r0,8 # channel end ?
+ be .Lcont
+ chi %r0,12 # channel end + device end ?
+ be .Lcont
+
+ l %r0,4(%r5)
+ s %r0,8(%r3) # r0/8 = number of ccws executed
+ mhi %r0,10 # *10 = number of bytes in ccws
+ lh %r3,10(%r5) # get residual count
+ sr %r0,%r3 # #ccws*80-residual=#bytes read
+ ar %r2,%r0
+
+ br %r4 # r2 contains the total size
+
+.Lcont:
+ ahi %r2,0x640 # add 0x640 to total size
+ la %r6,.Lccws
+ la %r7,20
+.Lincr:
+ l %r0,4(%r6) # update CCW data addresses
+ ahi %r0,0x640
+ st %r0,4(%r6)
+ ahi %r6,8
+ bct 7,.Lincr
+
+ b .Lldlp
+.Llderr:
+ lpsw .Lcrash
+
+ .align 8
+.Lorb: .long 0x00000000,0x0080ff00,.Lccws
+.Lirb: .long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.Lcr6: .long 0xff000000
+.Lloadp:.long 0,0
+ .align 8
+.Lcrash:.long 0x000a0000,0x00000000
+
+ .align 8
+.Lccws: .rept 19
+ .long 0x02600050,0x00000000
+ .endr
+ .long 0x02200050,0x00000000
+
+iplstart:
+ bas %r14,.Lsetmode # Immediately switch to 64 bit mode
+ lh %r1,0xb8 # test if subchannel number
+ bct %r1,.Lnoload # is valid
+ l %r1,0xb8 # load ipl subchannel number
+ la %r2,IPL_BS # load start address
+ bas %r14,.Lloader # load rest of ipl image
+ l %r12,.Lparm # pointer to parameter area
+ st %r1,IPL_DEVICE+ARCH_OFFSET-PARMAREA(%r12) # save ipl device number
+
+#
+# load parameter file from ipl device
+#
+.Lagain1:
+ l %r2,.Linitrd # ramdisk loc. is temp
+ bas %r14,.Lloader # load parameter file
+ ltr %r2,%r2 # got anything ?
+ bz .Lnopf
+ chi %r2,895
+ bnh .Lnotrunc
+ la %r2,895
+.Lnotrunc:
+ l %r4,.Linitrd
+ clc 0(3,%r4),.L_hdr # if it is HDRx
+ bz .Lagain1 # skip dataset header
+ clc 0(3,%r4),.L_eof # if it is EOFx
+ bz .Lagain1 # skip dateset trailer
+ la %r5,0(%r4,%r2)
+ lr %r3,%r2
+ la %r3,COMMAND_LINE-PARMAREA(%r12) # load adr. of command line
+ mvc 0(256,%r3),0(%r4)
+ mvc 256(256,%r3),256(%r4)
+ mvc 512(256,%r3),512(%r4)
+ mvc 768(122,%r3),768(%r4)
+ slr %r0,%r0
+ b .Lcntlp
+.Ldelspc:
+ ic %r0,0(%r2,%r3)
+ chi %r0,0x20 # is it a space ?
+ be .Lcntlp
+ ahi %r2,1
+ b .Leolp
+.Lcntlp:
+ brct %r2,.Ldelspc
+.Leolp:
+ slr %r0,%r0
+ stc %r0,0(%r2,%r3) # terminate buffer
+.Lnopf:
+
+#
+# load ramdisk from ipl device
+#
+.Lagain2:
+ l %r2,.Linitrd # addr of ramdisk
+ st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12)
+ bas %r14,.Lloader # load ramdisk
+ st %r2,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r12) # store size of rd
+ ltr %r2,%r2
+ bnz .Lrdcont
+ st %r2,INITRD_START+ARCH_OFFSET-PARMAREA(%r12) # no ramdisk found
+.Lrdcont:
+ l %r2,.Linitrd
+
+ clc 0(3,%r2),.L_hdr # skip HDRx and EOFx
+ bz .Lagain2
+ clc 0(3,%r2),.L_eof
+ bz .Lagain2
+
+#
+# reset files in VM reader
+#
+ stidp .Lcpuid # store cpuid
+ tm .Lcpuid,0xff # running VM ?
+ bno .Lnoreset
+ la %r2,.Lreset
+ lhi %r3,26
+ diag %r2,%r3,8
+ la %r5,.Lirb
+ stsch 0(%r5) # check if irq is pending
+ tm 30(%r5),0x0f # by verifying if any of the
+ bnz .Lwaitforirq # activity or status control
+ tm 31(%r5),0xff # bits is set in the schib
+ bz .Lnoreset
+.Lwaitforirq:
+ bas %r14,.Lirqwait # wait for IO interrupt
+ c %r1,0xb8 # compare subchannel number
+ bne .Lwaitforirq
+ la %r5,.Lirb
+ tsch 0(%r5)
+.Lnoreset:
+ b .Lnoload
+
+#
+# everything loaded, go for it
+#
+.Lnoload:
+ l %r1,.Lstartup
+ br %r1
+
+.Linitrd:.long _end # default address of initrd
+.Lparm: .long PARMAREA
+.Lstartup: .long startup
+.Lreset:.byte 0xc3,0xc8,0xc1,0xd5,0xc7,0xc5,0x40,0xd9,0xc4,0xd9,0x40
+ .byte 0xc1,0xd3,0xd3,0x40,0xd2,0xc5,0xc5,0xd7,0x40,0xd5,0xd6
+ .byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold"
+.L_eof: .long 0xc5d6c600 /* C'EOF' */
+.L_hdr: .long 0xc8c4d900 /* C'HDR' */
+ .align 8
+.Lcpuid:.fill 8,1,0
+
+#
+# SALIPL loader support. Based on a patch by Rob van der Heij.
+# This entry point is called directly from the SALIPL loader and
+# doesn't need a builtin ipl record.
+#
+ .org 0x800
+ENTRY(start)
+ stm %r0,%r15,0x07b0 # store registers
+ bas %r14,.Lsetmode # Immediately switch to 64 bit mode
+ basr %r12,%r0
+.base:
+ l %r11,.parm
+ l %r8,.cmd # pointer to command buffer
+
+ ltr %r9,%r9 # do we have SALIPL parameters?
+ bp .sk8x8
+
+ mvc 0(64,%r8),0x00b0 # copy saved registers
+ xc 64(240-64,%r8),0(%r8) # remainder of buffer
+ tr 0(64,%r8),.lowcase
+ b .gotr
+.sk8x8:
+ mvc 0(240,%r8),0(%r9) # copy iplparms into buffer
+.gotr:
+ slr %r0,%r0
+ st %r0,INITRD_SIZE+ARCH_OFFSET-PARMAREA(%r11)
+ st %r0,INITRD_START+ARCH_OFFSET-PARMAREA(%r11)
+ j startup # continue with startup
+.cmd: .long COMMAND_LINE # address of command line buffer
+.parm: .long PARMAREA
+.lowcase:
+ .byte 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07
+ .byte 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f
+ .byte 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17
+ .byte 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f
+ .byte 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27
+ .byte 0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f
+ .byte 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37
+ .byte 0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f
+ .byte 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47
+ .byte 0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f
+ .byte 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57
+ .byte 0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f
+ .byte 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67
+ .byte 0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f
+ .byte 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77
+ .byte 0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f
+
+ .byte 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87
+ .byte 0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f
+ .byte 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97
+ .byte 0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f
+ .byte 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7
+ .byte 0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf
+ .byte 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7
+ .byte 0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf
+ .byte 0xc0,0x81,0x82,0x83,0x84,0x85,0x86,0x87 # .abcdefg
+ .byte 0x88,0x89,0xca,0xcb,0xcc,0xcd,0xce,0xcf # hi
+ .byte 0xd0,0x91,0x92,0x93,0x94,0x95,0x96,0x97 # .jklmnop
+ .byte 0x98,0x99,0xda,0xdb,0xdc,0xdd,0xde,0xdf # qr
+ .byte 0xe0,0xe1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7 # ..stuvwx
+ .byte 0xa8,0xa9,0xea,0xeb,0xec,0xed,0xee,0xef # yz
+ .byte 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7
+ .byte 0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+
+#
+# startup-code at 0x10000, running in absolute addressing mode
+# this is called either by the ipl loader or directly by PSW restart
+# or linload or SALIPL
+#
+ .org 0x10000
+ENTRY(startup)
+ j .Lep_startup_normal
+ .org 0x10008
+#
+# This is a list of s390 kernel entry points. At address 0x1000f the number of
+# valid entry points is stored.
+#
+# IMPORTANT: Do not change this table, it is s390 kernel ABI!
+#
+ .ascii "S390EP"
+ .byte 0x00,0x01
+#
+# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
+#
+ .org 0x10010
+ENTRY(startup_kdump)
+ j .Lep_startup_kdump
+.Lep_startup_normal:
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
+ basr %r13,0 # get base
+.LPG0:
+ xc 0x200(256),0x200 # partially clear lowcore
+ xc 0x300(256),0x300
+ xc 0xe00(256),0xe00
+ stck __LC_LAST_UPDATE_CLOCK
+ spt 6f-.LPG0(%r13)
+ mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
+ xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST
+ # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10}
+ .insn s,0xb2b10000,0 # store facilities @ __LC_STFL_FAC_LIST
+ tm __LC_STFL_FAC_LIST,0x01 # stfle available ?
+ jz 0f
+ la %r0,1
+ .insn s,0xb2b00000,__LC_STFL_FAC_LIST # store facility list extended
+ # verify if all required facilities are supported by the machine
+0: la %r1,__LC_STFL_FAC_LIST
+ la %r2,3f+8-.LPG0(%r13)
+ l %r3,0(%r2)
+1: l %r0,0(%r1)
+ n %r0,4(%r2)
+ cl %r0,4(%r2)
+ jne 2f
+ la %r1,4(%r1)
+ la %r2,4(%r2)
+ ahi %r3,-1
+ jnz 1b
+ j 4f
+2: l %r15,.Lstack-.LPG0(%r13)
+ ahi %r15,-96
+ la %r2,.Lals_string-.LPG0(%r13)
+ l %r3,.Lsclp_print-.LPG0(%r13)
+ basr %r14,%r3
+ lpsw 3f-.LPG0(%r13) # machine type not good enough, crash
+.Lals_string:
+ .asciz "The Linux kernel requires more recent processor hardware"
+.Lsclp_print:
+ .long _sclp_print_early
+.Lstack:
+ .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
+ .align 16
+3: .long 0x000a0000,0x8badcccc
+
+# List of facilities that are required. If not all facilities are present
+# the kernel will crash. Format is number of facility words with bits set,
+# followed by the facility words.
+
+#if defined(CONFIG_MARCH_Z13)
+ .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+#elif defined(CONFIG_MARCH_ZEC12)
+ .long 3, 0xc100eff2, 0xf46ce800, 0x00400000
+#elif defined(CONFIG_MARCH_Z196)
+ .long 2, 0xc100eff2, 0xf46c0000
+#elif defined(CONFIG_MARCH_Z10)
+ .long 2, 0xc100eff2, 0xf0680000
+#elif defined(CONFIG_MARCH_Z9_109)
+ .long 1, 0xc100efc2
+#elif defined(CONFIG_MARCH_Z990)
+ .long 1, 0xc0002000
+#elif defined(CONFIG_MARCH_Z900)
+ .long 1, 0xc0000000
+#endif
+4:
+ /* Continue with 64bit startup code in head64.S */
+ sam64 # switch to 64 bit mode
+ jg startup_continue
+
+ .align 8
+6: .long 0x7fffffff,0xffffffff
+
+#include "head_kdump.S"
+
+#
+# params at 10400 (setup.h)
+#
+ .org PARMAREA
+ .long 0,0 # IPL_DEVICE
+ .long 0,0 # INITRD_START
+ .long 0,0 # INITRD_SIZE
+ .long 0,0 # OLDMEM_BASE
+ .long 0,0 # OLDMEM_SIZE
+
+ .org COMMAND_LINE
+ .byte "root=/dev/ram0 ro"
+ .byte 0
+
+ .org 0x11000
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
new file mode 100644
index 000000000..d7c005075
--- /dev/null
+++ b/arch/s390/kernel/head64.S
@@ -0,0 +1,105 @@
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ * Author(s): Hartmut Penner <hp@de.ibm.com>
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * Rob van der Heij <rvdhei@iae.nl>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+
+__HEAD
+ENTRY(startup_continue)
+ larl %r1,sched_clock_base_cc
+ mvc 0(8,%r1),__LC_LAST_UPDATE_CLOCK
+ larl %r13,.LPG1 # get base
+ lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
+ lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
+ # move IPL device to lowcore
+ lghi %r0,__LC_PASTE
+ stg %r0,__LC_VDSO_PER_CPU
+#
+# Setup stack
+#
+ larl %r15,init_thread_union
+ stg %r15,__LC_THREAD_INFO # cache thread info in lowcore
+ lg %r14,__TI_task(%r15) # cache current in lowcore
+ stg %r14,__LC_CURRENT
+ aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
+ stg %r15,__LC_KERNEL_STACK # set end of kernel stack
+ aghi %r15,-160
+#
+# Save ipl parameters, clear bss memory, initialize storage key for kernel pages,
+# and create a kernel NSS if the SAVESYS= parm is defined
+#
+ brasl %r14,startup_init
+ lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space,
+ # virtual and never return ...
+ .align 16
+.LPG1:
+.Lentry:.quad 0x0000000180000000,_stext
+.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
+ .quad 0 # cr1: primary space segment table
+ .quad .Lduct # cr2: dispatchable unit control table
+ .quad 0 # cr3: instruction authorization
+ .quad 0 # cr4: instruction authorization
+ .quad .Lduct # cr5: primary-aste origin
+ .quad 0 # cr6: I/O interrupts
+ .quad 0 # cr7: secondary space segment table
+ .quad 0 # cr8: access registers translation
+ .quad 0 # cr9: tracing off
+ .quad 0 # cr10: tracing off
+ .quad 0 # cr11: tracing off
+ .quad 0 # cr12: tracing off
+ .quad 0 # cr13: home space segment table
+ .quad 0xc0000000 # cr14: machine check handling off
+ .quad .Llinkage_stack # cr15: linkage stack operations
+.Lpcmsk:.quad 0x0000000180000000
+.L4malign:.quad 0xffffffffffc00000
+.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8
+.Lnop: .long 0x07000700
+.Lparmaddr:
+ .quad PARMAREA
+ .align 64
+.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
+ .long 0,0,0,0,0,0,0,0
+.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
+ .align 128
+.Lduald:.rept 8
+ .long 0x80000000,0,0,0 # invalid access-list entries
+ .endr
+.Llinkage_stack:
+ .long 0,0,0x89000000,0,0,0,0x8a000000,0
+
+ENTRY(_ehead)
+
+ .org 0x100000 - 0x11000 # head.o ends at 0x11000
+#
+# startup-code, running in absolute addressing mode
+#
+ENTRY(_stext)
+ basr %r13,0 # get base
+.LPG3:
+# check control registers
+ stctg %c0,%c15,0(%r15)
+ oi 6(%r15),0x60 # enable sigp emergency & external call
+ oi 4(%r15),0x10 # switch on low address proctection
+ lctlg %c0,%c15,0(%r15)
+
+ lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess
+ brasl %r14,start_kernel # go to C code
+#
+# We returned from start_kernel ?!? PANIK
+#
+ basr %r13,0
+ lpswe .Ldw-.(%r13) # load disabled wait psw
+
+ .align 8
+.Ldw: .quad 0x0002000180000000,0x0000000000000000
+.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S
new file mode 100644
index 000000000..d05950f02
--- /dev/null
+++ b/arch/s390/kernel/head_kdump.S
@@ -0,0 +1,100 @@
+/*
+ * S390 kdump lowlevel functions (new kernel)
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <asm/sigp.h>
+
+#define DATAMOVER_ADDR 0x4000
+#define COPY_PAGE_ADDR 0x6000
+
+#ifdef CONFIG_CRASH_DUMP
+
+#
+# kdump entry (new kernel - not yet relocated)
+#
+# Note: This code has to be position independent
+#
+
+.align 2
+.Lep_startup_kdump:
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to esame mode
+ sam64 # Switch to 64 bit addressing
+ basr %r13,0
+.Lbase:
+ larl %r2,.Lbase_addr # Check, if we have been
+ lg %r2,0(%r2) # already relocated:
+ clgr %r2,%r13 #
+ jne .Lrelocate # No : Start data mover
+ lghi %r2,0 # Yes: Start kdump kernel
+ brasl %r14,startup_kdump_relocated
+
+.Lrelocate:
+ larl %r4,startup
+ lg %r2,0x418(%r4) # Get kdump base
+ lg %r3,0x420(%r4) # Get kdump size
+
+ larl %r10,.Lcopy_start # Source of data mover
+ lghi %r8,DATAMOVER_ADDR # Target of data mover
+ mvc 0(256,%r8),0(%r10) # Copy data mover code
+
+ agr %r8,%r2 # Copy data mover to
+ mvc 0(256,%r8),0(%r10) # reserved mem
+
+ lghi %r14,DATAMOVER_ADDR # Jump to copied data mover
+ basr %r14,%r14
+.Lbase_addr:
+ .quad .Lbase
+
+#
+# kdump data mover code (runs at address DATAMOVER_ADDR)
+#
+# r2: kdump base address
+# r3: kdump size
+#
+.Lcopy_start:
+ basr %r13,0 # Base
+0:
+ lgr %r11,%r2 # Save kdump base address
+ lgr %r12,%r2
+ agr %r12,%r3 # Compute kdump end address
+
+ lghi %r5,0
+ lghi %r10,COPY_PAGE_ADDR # Load copy page address
+1:
+ mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp
+ mvc 0(256,%r5),0(%r11) # Copy new kernel to old
+ mvc 0(256,%r11),0(%r10) # Copy tmp to new
+ aghi %r11,256
+ aghi %r5,256
+ clgr %r11,%r12
+ jl 1b
+
+ lg %r14,.Lstartup_kdump-0b(%r13)
+ basr %r14,%r14 # Start relocated kernel
+.Lstartup_kdump:
+ .long 0x00000000,0x00000000 + startup_kdump_relocated
+.Lcopy_end:
+
+#
+# Startup of kdump (relocated new kernel)
+#
+.align 2
+startup_kdump_relocated:
+ basr %r13,0
+0: lpswe .Lrestart_psw-0b(%r13) # Start new kernel...
+.align 8
+.Lrestart_psw:
+ .quad 0x0000000080000000,0x0000000000000000 + startup
+#else
+.align 2
+.Lep_startup_kdump:
+ larl %r13,startup_kdump_crash
+ lpswe 0(%r13)
+.align 8
+startup_kdump_crash:
+ .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
new file mode 100644
index 000000000..7a55c29b0
--- /dev/null
+++ b/arch/s390/kernel/idle.c
@@ -0,0 +1,125 @@
+/*
+ * Idle functions for s390.
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/kprobes.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include "entry.h"
+
+static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
+void enabled_wait(void)
+{
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ unsigned long long idle_time;
+ unsigned long psw_mask;
+
+ trace_hardirqs_on();
+
+ /* Wait for external, I/O or machine check interrupt. */
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ clear_cpu_flag(CIF_NOHZ_DELAY);
+
+ /* Call the assembler magic in entry.S */
+ psw_idle(idle, psw_mask);
+
+ trace_hardirqs_off();
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ write_seqcount_begin(&idle->seqcount);
+ idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
+ idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
+ idle->idle_time += idle_time;
+ idle->idle_count++;
+ account_idle_time(idle_time);
+ write_seqcount_end(&idle->seqcount);
+}
+NOKPROBE_SYMBOL(enabled_wait);
+
+static ssize_t show_idle_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+ unsigned long long idle_count;
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&idle->seqcount);
+ idle_count = ACCESS_ONCE(idle->idle_count);
+ if (ACCESS_ONCE(idle->clock_idle_enter))
+ idle_count++;
+ } while (read_seqcount_retry(&idle->seqcount, seq));
+ return sprintf(buf, "%llu\n", idle_count);
+}
+DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
+
+static ssize_t show_idle_time(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+ unsigned long long now, idle_time, idle_enter, idle_exit;
+ unsigned int seq;
+
+ do {
+ now = get_tod_clock();
+ seq = read_seqcount_begin(&idle->seqcount);
+ idle_time = ACCESS_ONCE(idle->idle_time);
+ idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
+ idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
+ } while (read_seqcount_retry(&idle->seqcount, seq));
+ idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
+ return sprintf(buf, "%llu\n", idle_time >> 12);
+}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+
+cputime64_t arch_cpu_idle_time(int cpu)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+ unsigned long long now, idle_enter, idle_exit;
+ unsigned int seq;
+
+ do {
+ now = get_tod_clock();
+ seq = read_seqcount_begin(&idle->seqcount);
+ idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
+ idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
+ } while (read_seqcount_retry(&idle->seqcount, seq));
+ return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
+}
+
+void arch_cpu_idle_enter(void)
+{
+ local_mcck_disable();
+}
+
+void arch_cpu_idle(void)
+{
+ if (!test_cpu_flag(CIF_MCCK_PENDING))
+ /* Halt the cpu and keep track of cpu time accounting. */
+ enabled_wait();
+ local_irq_enable();
+}
+
+void arch_cpu_idle_exit(void)
+{
+ local_mcck_enable();
+ if (test_cpu_flag(CIF_MCCK_PENDING))
+ s390_handle_mcck();
+}
+
+void arch_cpu_idle_dead(void)
+{
+ cpu_die();
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
new file mode 100644
index 000000000..52fbef91d
--- /dev/null
+++ b/arch/s390/kernel/ipl.c
@@ -0,0 +1,2064 @@
+/*
+ * ipl/reipl/dump support for Linux on s390.
+ *
+ * Copyright IBM Corp. 2005, 2012
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Volker Sameske <sameske@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/crash_dump.h>
+#include <linux/debug_locks.h>
+#include <asm/ipl.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
+#include <asm/cpcmd.h>
+#include <asm/cio.h>
+#include <asm/ebcdic.h>
+#include <asm/reset.h>
+#include <asm/sclp.h>
+#include <asm/checksum.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include "entry.h"
+
+#define IPL_PARM_BLOCK_VERSION 0
+
+#define IPL_UNKNOWN_STR "unknown"
+#define IPL_CCW_STR "ccw"
+#define IPL_FCP_STR "fcp"
+#define IPL_FCP_DUMP_STR "fcp_dump"
+#define IPL_NSS_STR "nss"
+
+#define DUMP_CCW_STR "ccw"
+#define DUMP_FCP_STR "fcp"
+#define DUMP_NONE_STR "none"
+
+/*
+ * Four shutdown trigger types are supported:
+ * - panic
+ * - halt
+ * - power off
+ * - reipl
+ * - restart
+ */
+#define ON_PANIC_STR "on_panic"
+#define ON_HALT_STR "on_halt"
+#define ON_POFF_STR "on_poff"
+#define ON_REIPL_STR "on_reboot"
+#define ON_RESTART_STR "on_restart"
+
+struct shutdown_action;
+struct shutdown_trigger {
+ char *name;
+ struct shutdown_action *action;
+};
+
+/*
+ * The following shutdown action types are supported:
+ */
+#define SHUTDOWN_ACTION_IPL_STR "ipl"
+#define SHUTDOWN_ACTION_REIPL_STR "reipl"
+#define SHUTDOWN_ACTION_DUMP_STR "dump"
+#define SHUTDOWN_ACTION_VMCMD_STR "vmcmd"
+#define SHUTDOWN_ACTION_STOP_STR "stop"
+#define SHUTDOWN_ACTION_DUMP_REIPL_STR "dump_reipl"
+
+struct shutdown_action {
+ char *name;
+ void (*fn) (struct shutdown_trigger *trigger);
+ int (*init) (void);
+ int init_rc;
+};
+
+static char *ipl_type_str(enum ipl_type type)
+{
+ switch (type) {
+ case IPL_TYPE_CCW:
+ return IPL_CCW_STR;
+ case IPL_TYPE_FCP:
+ return IPL_FCP_STR;
+ case IPL_TYPE_FCP_DUMP:
+ return IPL_FCP_DUMP_STR;
+ case IPL_TYPE_NSS:
+ return IPL_NSS_STR;
+ case IPL_TYPE_UNKNOWN:
+ default:
+ return IPL_UNKNOWN_STR;
+ }
+}
+
+enum dump_type {
+ DUMP_TYPE_NONE = 1,
+ DUMP_TYPE_CCW = 2,
+ DUMP_TYPE_FCP = 4,
+};
+
+static char *dump_type_str(enum dump_type type)
+{
+ switch (type) {
+ case DUMP_TYPE_NONE:
+ return DUMP_NONE_STR;
+ case DUMP_TYPE_CCW:
+ return DUMP_CCW_STR;
+ case DUMP_TYPE_FCP:
+ return DUMP_FCP_STR;
+ default:
+ return NULL;
+ }
+}
+
+/*
+ * Must be in data section since the bss section
+ * is not cleared when these are accessed.
+ */
+static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
+u32 ipl_flags __attribute__((__section__(".data"))) = 0;
+
+enum ipl_method {
+ REIPL_METHOD_CCW_CIO,
+ REIPL_METHOD_CCW_DIAG,
+ REIPL_METHOD_CCW_VM,
+ REIPL_METHOD_FCP_RO_DIAG,
+ REIPL_METHOD_FCP_RW_DIAG,
+ REIPL_METHOD_FCP_RO_VM,
+ REIPL_METHOD_FCP_DUMP,
+ REIPL_METHOD_NSS,
+ REIPL_METHOD_NSS_DIAG,
+ REIPL_METHOD_DEFAULT,
+};
+
+enum dump_method {
+ DUMP_METHOD_NONE,
+ DUMP_METHOD_CCW_CIO,
+ DUMP_METHOD_CCW_DIAG,
+ DUMP_METHOD_CCW_VM,
+ DUMP_METHOD_FCP_DIAG,
+};
+
+static int diag308_set_works = 0;
+
+static struct ipl_parameter_block ipl_block;
+
+static int reipl_capabilities = IPL_TYPE_UNKNOWN;
+
+static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
+static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
+static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_ccw;
+static struct ipl_parameter_block *reipl_block_nss;
+static struct ipl_parameter_block *reipl_block_actual;
+
+static int dump_capabilities = DUMP_TYPE_NONE;
+static enum dump_type dump_type = DUMP_TYPE_NONE;
+static enum dump_method dump_method = DUMP_METHOD_NONE;
+static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_ccw;
+
+static struct sclp_ipl_info sclp_ipl_info;
+
+int diag308(unsigned long subcode, void *addr)
+{
+ register unsigned long _addr asm("0") = (unsigned long) addr;
+ register unsigned long _rc asm("1") = 0;
+
+ asm volatile(
+ " diag %0,%2,0x308\n"
+ "0:\n"
+ EX_TABLE(0b,0b)
+ : "+d" (_addr), "+d" (_rc)
+ : "d" (subcode) : "cc", "memory");
+ return _rc;
+}
+EXPORT_SYMBOL_GPL(diag308);
+
+/* SYSFS */
+
+#define IPL_ATTR_SHOW_FN(_prefix, _name, _format, args...) \
+static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ return snprintf(page, PAGE_SIZE, _format, ##args); \
+}
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value) \
+IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value) \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name, S_IRUGO, sys_##_prefix##_##_name##_show, NULL)
+
+#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value) \
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ unsigned long long value; \
+ if (sscanf(buf, _fmt_in, &value) != 1) \
+ return -EINVAL; \
+ _value = value; \
+ return len; \
+} \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name,(S_IRUGO | S_IWUSR), \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store)
+
+#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value) \
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ strncpy(_value, buf, sizeof(_value) - 1); \
+ strim(_value); \
+ return len; \
+} \
+static struct kobj_attribute sys_##_prefix##_##_name##_attr = \
+ __ATTR(_name,(S_IRUGO | S_IWUSR), \
+ sys_##_prefix##_##_name##_show, \
+ sys_##_prefix##_##_name##_store)
+
+static void make_attrs_ro(struct attribute **attrs)
+{
+ while (*attrs) {
+ (*attrs)->mode = S_IRUGO;
+ attrs++;
+ }
+}
+
+/*
+ * ipl section
+ */
+
+static __init enum ipl_type get_ipl_type(void)
+{
+ struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
+
+ if (ipl_flags & IPL_NSS_VALID)
+ return IPL_TYPE_NSS;
+ if (!(ipl_flags & IPL_DEVNO_VALID))
+ return IPL_TYPE_UNKNOWN;
+ if (!(ipl_flags & IPL_PARMBLOCK_VALID))
+ return IPL_TYPE_CCW;
+ if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION)
+ return IPL_TYPE_UNKNOWN;
+ if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP)
+ return IPL_TYPE_UNKNOWN;
+ if (ipl->ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
+ return IPL_TYPE_FCP_DUMP;
+ return IPL_TYPE_FCP;
+}
+
+struct ipl_info ipl_info;
+EXPORT_SYMBOL_GPL(ipl_info);
+
+static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+}
+
+static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+
+/* VM IPL PARM routines */
+static size_t reipl_get_ascii_vmparm(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ int i;
+ size_t len;
+ char has_lowercase = 0;
+
+ len = 0;
+ if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+ (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+
+ len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
+ memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+ /* If at least one character is lowercase, we assume mixed
+ * case; otherwise we convert everything to lowercase.
+ */
+ for (i = 0; i < len; i++)
+ if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+ (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+ (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+ has_lowercase = 1;
+ break;
+ }
+ if (!has_lowercase)
+ EBC_TOLOWER(dest, len);
+ EBCASC(dest, len);
+ }
+ dest[len] = 0;
+
+ return len;
+}
+
+size_t append_ipl_vmparm(char *dest, size_t size)
+{
+ size_t rc;
+
+ rc = 0;
+ if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+ rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
+ else
+ dest[0] = 0;
+ return rc;
+}
+
+static ssize_t ipl_vm_parm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ char parm[DIAG308_VMPARM_SIZE + 1] = {};
+
+ append_ipl_vmparm(parm, sizeof(parm));
+ return sprintf(page, "%s\n", parm);
+}
+
+static size_t scpdata_length(const char* buf, size_t count)
+{
+ while (count) {
+ if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
+ break;
+ count--;
+ }
+ return count;
+}
+
+static size_t reipl_append_ascii_scpdata(char *dest, size_t size,
+ const struct ipl_parameter_block *ipb)
+{
+ size_t count;
+ size_t i;
+ int has_lowercase;
+
+ count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
+ ipb->ipl_info.fcp.scp_data_len));
+ if (!count)
+ goto out;
+
+ has_lowercase = 0;
+ for (i = 0; i < count; i++) {
+ if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+ count = 0;
+ goto out;
+ }
+ if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+ has_lowercase = 1;
+ }
+
+ if (has_lowercase)
+ memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+ else
+ for (i = 0; i < count; i++)
+ dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+out:
+ dest[count] = '\0';
+ return count;
+}
+
+size_t append_ipl_scpdata(char *dest, size_t len)
+{
+ size_t rc;
+
+ rc = 0;
+ if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
+ rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
+ else
+ dest[0] = 0;
+ return rc;
+}
+
+
+static struct kobj_attribute sys_ipl_vm_parm_attr =
+ __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+
+static ssize_t sys_ipl_device_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
+
+ switch (ipl_info.type) {
+ case IPL_TYPE_CCW:
+ return sprintf(page, "0.0.%04x\n", ipl_devno);
+ case IPL_TYPE_FCP:
+ case IPL_TYPE_FCP_DUMP:
+ return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
+ default:
+ return 0;
+ }
+}
+
+static struct kobj_attribute sys_ipl_device_attr =
+ __ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
+
+static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
+ IPL_PARMBLOCK_SIZE);
+}
+static struct bin_attribute ipl_parameter_attr =
+ __BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL,
+ PAGE_SIZE);
+
+static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
+ void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+static struct bin_attribute ipl_scp_data_attr =
+ __BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+ &ipl_parameter_attr,
+ &ipl_scp_data_attr,
+ NULL,
+};
+
+/* FCP ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long)
+ IPL_PARMBLOCK_START->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long)
+ IPL_PARMBLOCK_START->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long)
+ IPL_PARMBLOCK_START->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)
+ IPL_PARMBLOCK_START->ipl_info.fcp.br_lba);
+
+static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ char loadparm[LOADPARM_LEN + 1] = {};
+
+ if (!sclp_ipl_info.is_valid)
+ return sprintf(page, "#unknown#\n");
+ memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ EBCASC(loadparm, LOADPARM_LEN);
+ strim(loadparm);
+ return sprintf(page, "%s\n", loadparm);
+}
+
+static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
+ __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
+
+static struct attribute *ipl_fcp_attrs[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_fcp_wwpn_attr.attr,
+ &sys_ipl_fcp_lun_attr.attr,
+ &sys_ipl_fcp_bootprog_attr.attr,
+ &sys_ipl_fcp_br_lba_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_fcp_attr_group = {
+ .attrs = ipl_fcp_attrs,
+ .bin_attrs = ipl_fcp_bin_attrs,
+};
+
+/* CCW ipl device attributes */
+
+static struct attribute *ipl_ccw_attrs_vm[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_vm_parm_attr.attr,
+ NULL,
+};
+
+static struct attribute *ipl_ccw_attrs_lpar[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group_vm = {
+ .attrs = ipl_ccw_attrs_vm,
+};
+
+static struct attribute_group ipl_ccw_attr_group_lpar = {
+ .attrs = ipl_ccw_attrs_lpar
+};
+
+/* NSS ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name);
+
+static struct attribute *ipl_nss_attrs[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_nss_name_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_vm_parm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_nss_attr_group = {
+ .attrs = ipl_nss_attrs,
+};
+
+/* UNKNOWN ipl device attributes */
+
+static struct attribute *ipl_unknown_attrs[] = {
+ &sys_ipl_type_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_unknown_attr_group = {
+ .attrs = ipl_unknown_attrs,
+};
+
+static struct kset *ipl_kset;
+
+static void __ipl_run(void *unused)
+{
+ diag308(DIAG308_IPL, NULL);
+ if (MACHINE_IS_VM)
+ __cpcmd("IPL", NULL, 0, NULL);
+ else if (ipl_info.type == IPL_TYPE_CCW)
+ reipl_ccw_dev(&ipl_info.data.ccw.dev_id);
+}
+
+static void ipl_run(struct shutdown_trigger *trigger)
+{
+ smp_call_ipl_cpu(__ipl_run, NULL);
+}
+
+static int __init ipl_init(void)
+{
+ int rc;
+
+ ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj);
+ if (!ipl_kset) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ switch (ipl_info.type) {
+ case IPL_TYPE_CCW:
+ if (MACHINE_IS_VM)
+ rc = sysfs_create_group(&ipl_kset->kobj,
+ &ipl_ccw_attr_group_vm);
+ else
+ rc = sysfs_create_group(&ipl_kset->kobj,
+ &ipl_ccw_attr_group_lpar);
+ break;
+ case IPL_TYPE_FCP:
+ case IPL_TYPE_FCP_DUMP:
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
+ break;
+ case IPL_TYPE_NSS:
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group);
+ break;
+ default:
+ rc = sysfs_create_group(&ipl_kset->kobj,
+ &ipl_unknown_attr_group);
+ break;
+ }
+out:
+ if (rc)
+ panic("ipl_init failed: rc = %i\n", rc);
+
+ return 0;
+}
+
+static struct shutdown_action __refdata ipl_action = {
+ .name = SHUTDOWN_ACTION_IPL_STR,
+ .fn = ipl_run,
+ .init = ipl_init,
+};
+
+/*
+ * reipl shutdown action: Reboot Linux on shutdown.
+ */
+
+/* VM IPL PARM attributes */
+static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
+ char *page)
+{
+ char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+
+ reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+ return sprintf(page, "%s\n", vmparm);
+}
+
+static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
+ size_t vmparm_max,
+ const char *buf, size_t len)
+{
+ int i, ip_len;
+
+ /* ignore trailing newline */
+ ip_len = len;
+ if ((len > 0) && (buf[len - 1] == '\n'))
+ ip_len--;
+
+ if (ip_len > vmparm_max)
+ return -EINVAL;
+
+ /* parm is used to store kernel options, check for common chars */
+ for (i = 0; i < ip_len; i++)
+ if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
+ return -EINVAL;
+
+ memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+ ipb->ipl_info.ccw.vm_parm_len = ip_len;
+ if (ip_len > 0) {
+ ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+ memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
+ ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+ } else {
+ ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+ }
+
+ return len;
+}
+
+/* NSS wrapper */
+static ssize_t reipl_nss_vmparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_vmparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_vmparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_vmparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nss_vmparm_attr =
+ __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
+ reipl_nss_vmparm_store);
+static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
+ __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
+ reipl_ccw_vmparm_store);
+
+/* FCP reipl device attributes */
+
+static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
+ void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t padding;
+ size_t scpdata_len;
+
+ if (off < 0)
+ return -EINVAL;
+
+ if (off >= DIAG308_SCPDATA_SIZE)
+ return -ENOSPC;
+
+ if (count > DIAG308_SCPDATA_SIZE - off)
+ count = DIAG308_SCPDATA_SIZE - off;
+
+ memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count);
+ scpdata_len = off + count;
+
+ if (scpdata_len % 8) {
+ padding = 8 - (scpdata_len % 8);
+ memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+ 0, padding);
+ scpdata_len += padding;
+ }
+
+ reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
+ reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
+ reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
+
+ return count;
+}
+static struct bin_attribute sys_reipl_fcp_scp_data_attr =
+ __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_fcp_scpdata_read,
+ reipl_fcp_scpdata_write, PAGE_SIZE);
+
+static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+ &sys_reipl_fcp_scp_data_attr,
+ NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
+ reipl_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
+ reipl_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+ reipl_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
+ reipl_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+ reipl_block_fcp->ipl_info.fcp.devno);
+
+static void reipl_get_ascii_loadparm(char *loadparm,
+ struct ipl_parameter_block *ibp)
+{
+ memcpy(loadparm, ibp->hdr.loadparm, LOADPARM_LEN);
+ EBCASC(loadparm, LOADPARM_LEN);
+ loadparm[LOADPARM_LEN] = 0;
+ strim(loadparm);
+}
+
+static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
+ char *page)
+{
+ char buf[LOADPARM_LEN + 1];
+
+ reipl_get_ascii_loadparm(buf, ipb);
+ return sprintf(page, "%s\n", buf);
+}
+
+static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
+ const char *buf, size_t len)
+{
+ int i, lp_len;
+
+ /* ignore trailing newline */
+ lp_len = len;
+ if ((len > 0) && (buf[len - 1] == '\n'))
+ lp_len--;
+ /* loadparm can have max 8 characters and must not start with a blank */
+ if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' ')))
+ return -EINVAL;
+ /* loadparm can only contain "a-z,A-Z,0-9,SP,." */
+ for (i = 0; i < lp_len; i++) {
+ if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') ||
+ (buf[i] == '.'))
+ continue;
+ return -EINVAL;
+ }
+ /* initialize loadparm with blanks */
+ memset(ipb->hdr.loadparm, ' ', LOADPARM_LEN);
+ /* copy and convert to ebcdic */
+ memcpy(ipb->hdr.loadparm, buf, lp_len);
+ ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
+ return len;
+}
+
+/* FCP wrapper */
+static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_fcp, page);
+}
+
+static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_fcp, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
+ reipl_fcp_loadparm_store);
+
+static struct attribute *reipl_fcp_attrs[] = {
+ &sys_reipl_fcp_device_attr.attr,
+ &sys_reipl_fcp_wwpn_attr.attr,
+ &sys_reipl_fcp_lun_attr.attr,
+ &sys_reipl_fcp_bootprog_attr.attr,
+ &sys_reipl_fcp_br_lba_attr.attr,
+ &sys_reipl_fcp_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_fcp_attr_group = {
+ .attrs = reipl_fcp_attrs,
+ .bin_attrs = reipl_fcp_bin_attrs,
+};
+
+/* CCW reipl device attributes */
+
+DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+ reipl_block_ccw->ipl_info.ccw.devno);
+
+/* NSS wrapper */
+static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
+ reipl_ccw_loadparm_store);
+
+static struct attribute *reipl_ccw_attrs_vm[] = {
+ &sys_reipl_ccw_device_attr.attr,
+ &sys_reipl_ccw_loadparm_attr.attr,
+ &sys_reipl_ccw_vmparm_attr.attr,
+ NULL,
+};
+
+static struct attribute *reipl_ccw_attrs_lpar[] = {
+ &sys_reipl_ccw_device_attr.attr,
+ &sys_reipl_ccw_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group_vm = {
+ .name = IPL_CCW_STR,
+ .attrs = reipl_ccw_attrs_vm,
+};
+
+static struct attribute_group reipl_ccw_attr_group_lpar = {
+ .name = IPL_CCW_STR,
+ .attrs = reipl_ccw_attrs_lpar,
+};
+
+
+/* NSS reipl device attributes */
+static void reipl_get_ascii_nss_name(char *dst,
+ struct ipl_parameter_block *ipb)
+{
+ memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+ EBCASC(dst, NSS_NAME_SIZE);
+ dst[NSS_NAME_SIZE] = 0;
+}
+
+static ssize_t reipl_nss_name_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ char nss_name[NSS_NAME_SIZE + 1] = {};
+
+ reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
+ return sprintf(page, "%s\n", nss_name);
+}
+
+static ssize_t reipl_nss_name_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int nss_len;
+
+ /* ignore trailing newline */
+ nss_len = len;
+ if ((len > 0) && (buf[len - 1] == '\n'))
+ nss_len--;
+
+ if (nss_len > NSS_NAME_SIZE)
+ return -EINVAL;
+
+ memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+ if (nss_len > 0) {
+ reipl_block_nss->ipl_info.ccw.vm_flags |=
+ DIAG308_VM_FLAGS_NSS_VALID;
+ memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
+ ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ } else {
+ reipl_block_nss->ipl_info.ccw.vm_flags &=
+ ~DIAG308_VM_FLAGS_NSS_VALID;
+ }
+
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_nss_name_attr =
+ __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
+ reipl_nss_name_store);
+
+static struct kobj_attribute sys_reipl_nss_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
+ reipl_nss_loadparm_store);
+
+static struct attribute *reipl_nss_attrs[] = {
+ &sys_reipl_nss_name_attr.attr,
+ &sys_reipl_nss_loadparm_attr.attr,
+ &sys_reipl_nss_vmparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_nss_attr_group = {
+ .name = IPL_NSS_STR,
+ .attrs = reipl_nss_attrs,
+};
+
+static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block)
+{
+ reipl_block_actual = reipl_block;
+ os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+ reipl_block->hdr.len);
+}
+
+/* reipl type */
+
+static int reipl_set_type(enum ipl_type type)
+{
+ if (!(reipl_capabilities & type))
+ return -EINVAL;
+
+ switch(type) {
+ case IPL_TYPE_CCW:
+ if (diag308_set_works)
+ reipl_method = REIPL_METHOD_CCW_DIAG;
+ else if (MACHINE_IS_VM)
+ reipl_method = REIPL_METHOD_CCW_VM;
+ else
+ reipl_method = REIPL_METHOD_CCW_CIO;
+ set_reipl_block_actual(reipl_block_ccw);
+ break;
+ case IPL_TYPE_FCP:
+ if (diag308_set_works)
+ reipl_method = REIPL_METHOD_FCP_RW_DIAG;
+ else if (MACHINE_IS_VM)
+ reipl_method = REIPL_METHOD_FCP_RO_VM;
+ else
+ reipl_method = REIPL_METHOD_FCP_RO_DIAG;
+ set_reipl_block_actual(reipl_block_fcp);
+ break;
+ case IPL_TYPE_FCP_DUMP:
+ reipl_method = REIPL_METHOD_FCP_DUMP;
+ break;
+ case IPL_TYPE_NSS:
+ if (diag308_set_works)
+ reipl_method = REIPL_METHOD_NSS_DIAG;
+ else
+ reipl_method = REIPL_METHOD_NSS;
+ set_reipl_block_actual(reipl_block_nss);
+ break;
+ case IPL_TYPE_UNKNOWN:
+ reipl_method = REIPL_METHOD_DEFAULT;
+ break;
+ default:
+ BUG();
+ }
+ reipl_type = type;
+ return 0;
+}
+
+static ssize_t reipl_type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+}
+
+static ssize_t reipl_type_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int rc = -EINVAL;
+
+ if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_CCW);
+ else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_FCP);
+ else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_NSS);
+ return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute reipl_type_attr =
+ __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
+
+static struct kset *reipl_kset;
+static struct kset *reipl_fcp_kset;
+
+static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
+ const enum ipl_method m)
+{
+ char loadparm[LOADPARM_LEN + 1] = {};
+ char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+ char nss_name[NSS_NAME_SIZE + 1] = {};
+ size_t pos = 0;
+
+ reipl_get_ascii_loadparm(loadparm, ipb);
+ reipl_get_ascii_nss_name(nss_name, ipb);
+ reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+
+ switch (m) {
+ case REIPL_METHOD_CCW_VM:
+ pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno);
+ break;
+ case REIPL_METHOD_NSS:
+ pos = sprintf(dst, "IPL %s", nss_name);
+ break;
+ default:
+ break;
+ }
+ if (strlen(loadparm) > 0)
+ pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm);
+ if (strlen(vmparm) > 0)
+ sprintf(dst + pos, " PARM %s", vmparm);
+}
+
+static void __reipl_run(void *unused)
+{
+ struct ccw_dev_id devid;
+ static char buf[128];
+
+ switch (reipl_method) {
+ case REIPL_METHOD_CCW_CIO:
+ devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
+ devid.ssid = 0;
+ reipl_ccw_dev(&devid);
+ break;
+ case REIPL_METHOD_CCW_VM:
+ get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM);
+ __cpcmd(buf, NULL, 0, NULL);
+ break;
+ case REIPL_METHOD_CCW_DIAG:
+ diag308(DIAG308_SET, reipl_block_ccw);
+ diag308(DIAG308_IPL, NULL);
+ break;
+ case REIPL_METHOD_FCP_RW_DIAG:
+ diag308(DIAG308_SET, reipl_block_fcp);
+ diag308(DIAG308_IPL, NULL);
+ break;
+ case REIPL_METHOD_FCP_RO_DIAG:
+ diag308(DIAG308_IPL, NULL);
+ break;
+ case REIPL_METHOD_FCP_RO_VM:
+ __cpcmd("IPL", NULL, 0, NULL);
+ break;
+ case REIPL_METHOD_NSS_DIAG:
+ diag308(DIAG308_SET, reipl_block_nss);
+ diag308(DIAG308_IPL, NULL);
+ break;
+ case REIPL_METHOD_NSS:
+ get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
+ __cpcmd(buf, NULL, 0, NULL);
+ break;
+ case REIPL_METHOD_DEFAULT:
+ if (MACHINE_IS_VM)
+ __cpcmd("IPL", NULL, 0, NULL);
+ diag308(DIAG308_IPL, NULL);
+ break;
+ case REIPL_METHOD_FCP_DUMP:
+ break;
+ }
+ disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+static void reipl_run(struct shutdown_trigger *trigger)
+{
+ smp_call_ipl_cpu(__reipl_run, NULL);
+}
+
+static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
+{
+ ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
+ ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+ ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+}
+
+static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
+{
+ /* LOADPARM */
+ /* check if read scp info worked and set loadparm */
+ if (sclp_ipl_info.is_valid)
+ memcpy(ipb->hdr.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ else
+ /* read scp info failed: set empty loadparm (EBCDIC blanks) */
+ memset(ipb->hdr.loadparm, 0x40, LOADPARM_LEN);
+ ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+
+ /* VM PARM */
+ if (MACHINE_IS_VM && diag308_set_works &&
+ (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+
+ ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+ ipb->ipl_info.ccw.vm_parm_len =
+ ipl_block.ipl_info.ccw.vm_parm_len;
+ memcpy(ipb->ipl_info.ccw.vm_parm,
+ ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+ }
+}
+
+static int __init reipl_nss_init(void)
+{
+ int rc;
+
+ if (!MACHINE_IS_VM)
+ return 0;
+
+ reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_nss)
+ return -ENOMEM;
+
+ if (!diag308_set_works)
+ sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO;
+
+ rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
+ if (rc)
+ return rc;
+
+ reipl_block_ccw_init(reipl_block_nss);
+ if (ipl_info.type == IPL_TYPE_NSS) {
+ memset(reipl_block_nss->ipl_info.ccw.nss_name,
+ ' ', NSS_NAME_SIZE);
+ memcpy(reipl_block_nss->ipl_info.ccw.nss_name,
+ kernel_nss_name, strlen(kernel_nss_name));
+ ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+ reipl_block_nss->ipl_info.ccw.vm_flags |=
+ DIAG308_VM_FLAGS_NSS_VALID;
+
+ reipl_block_ccw_fill_parms(reipl_block_nss);
+ }
+
+ reipl_capabilities |= IPL_TYPE_NSS;
+ return 0;
+}
+
+static int __init reipl_ccw_init(void)
+{
+ int rc;
+
+ reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_ccw)
+ return -ENOMEM;
+
+ if (MACHINE_IS_VM) {
+ if (!diag308_set_works)
+ sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO;
+ rc = sysfs_create_group(&reipl_kset->kobj,
+ &reipl_ccw_attr_group_vm);
+ } else {
+ if(!diag308_set_works)
+ sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
+ rc = sysfs_create_group(&reipl_kset->kobj,
+ &reipl_ccw_attr_group_lpar);
+ }
+ if (rc)
+ return rc;
+
+ reipl_block_ccw_init(reipl_block_ccw);
+ if (ipl_info.type == IPL_TYPE_CCW) {
+ reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+ reipl_block_ccw_fill_parms(reipl_block_ccw);
+ }
+
+ reipl_capabilities |= IPL_TYPE_CCW;
+ return 0;
+}
+
+static int __init reipl_fcp_init(void)
+{
+ int rc;
+
+ if (!diag308_set_works) {
+ if (ipl_info.type == IPL_TYPE_FCP) {
+ make_attrs_ro(reipl_fcp_attrs);
+ sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
+ } else
+ return 0;
+ }
+
+ reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_fcp)
+ return -ENOMEM;
+
+ /* sysfs: create fcp kset for mixing attr group and bin attrs */
+ reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
+ &reipl_kset->kobj);
+ if (!reipl_fcp_kset) {
+ free_page((unsigned long) reipl_block_fcp);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+ if (rc) {
+ kset_unregister(reipl_fcp_kset);
+ free_page((unsigned long) reipl_block_fcp);
+ return rc;
+ }
+
+ if (ipl_info.type == IPL_TYPE_FCP) {
+ memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * is invalid in the SCSI IPL parameter block, so take it
+ * always from sclp_ipl_info.
+ */
+ memcpy(reipl_block_fcp->hdr.loadparm, sclp_ipl_info.loadparm,
+ LOADPARM_LEN);
+ } else {
+ reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+ reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
+ reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+ reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+ }
+ reipl_capabilities |= IPL_TYPE_FCP;
+ return 0;
+}
+
+static int __init reipl_type_init(void)
+{
+ enum ipl_type reipl_type = ipl_info.type;
+ struct ipl_parameter_block *reipl_block;
+ unsigned long size;
+
+ reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size);
+ if (!reipl_block)
+ goto out;
+ /*
+ * If we have an OS info reipl block, this will be used
+ */
+ if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+ memcpy(reipl_block_fcp, reipl_block, size);
+ reipl_type = IPL_TYPE_FCP;
+ } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+ memcpy(reipl_block_ccw, reipl_block, size);
+ reipl_type = IPL_TYPE_CCW;
+ }
+out:
+ return reipl_set_type(reipl_type);
+}
+
+static int __init reipl_init(void)
+{
+ int rc;
+
+ reipl_kset = kset_create_and_add("reipl", NULL, firmware_kobj);
+ if (!reipl_kset)
+ return -ENOMEM;
+ rc = sysfs_create_file(&reipl_kset->kobj, &reipl_type_attr.attr);
+ if (rc) {
+ kset_unregister(reipl_kset);
+ return rc;
+ }
+ rc = reipl_ccw_init();
+ if (rc)
+ return rc;
+ rc = reipl_fcp_init();
+ if (rc)
+ return rc;
+ rc = reipl_nss_init();
+ if (rc)
+ return rc;
+ return reipl_type_init();
+}
+
+static struct shutdown_action __refdata reipl_action = {
+ .name = SHUTDOWN_ACTION_REIPL_STR,
+ .fn = reipl_run,
+ .init = reipl_init,
+};
+
+/*
+ * dump shutdown action: Dump Linux on shutdown.
+ */
+
+/* FCP dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
+ dump_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
+ dump_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+ dump_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
+ dump_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+ dump_block_fcp->ipl_info.fcp.devno);
+
+static struct attribute *dump_fcp_attrs[] = {
+ &sys_dump_fcp_device_attr.attr,
+ &sys_dump_fcp_wwpn_attr.attr,
+ &sys_dump_fcp_lun_attr.attr,
+ &sys_dump_fcp_bootprog_attr.attr,
+ &sys_dump_fcp_br_lba_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_fcp_attr_group = {
+ .name = IPL_FCP_STR,
+ .attrs = dump_fcp_attrs,
+};
+
+/* CCW dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+ dump_block_ccw->ipl_info.ccw.devno);
+
+static struct attribute *dump_ccw_attrs[] = {
+ &sys_dump_ccw_device_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_ccw_attr_group = {
+ .name = IPL_CCW_STR,
+ .attrs = dump_ccw_attrs,
+};
+
+/* dump type */
+
+static int dump_set_type(enum dump_type type)
+{
+ if (!(dump_capabilities & type))
+ return -EINVAL;
+ switch (type) {
+ case DUMP_TYPE_CCW:
+ if (diag308_set_works)
+ dump_method = DUMP_METHOD_CCW_DIAG;
+ else if (MACHINE_IS_VM)
+ dump_method = DUMP_METHOD_CCW_VM;
+ else
+ dump_method = DUMP_METHOD_CCW_CIO;
+ break;
+ case DUMP_TYPE_FCP:
+ dump_method = DUMP_METHOD_FCP_DIAG;
+ break;
+ default:
+ dump_method = DUMP_METHOD_NONE;
+ }
+ dump_type = type;
+ return 0;
+}
+
+static ssize_t dump_type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", dump_type_str(dump_type));
+}
+
+static ssize_t dump_type_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int rc = -EINVAL;
+
+ if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_NONE);
+ else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_CCW);
+ else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_FCP);
+ return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute dump_type_attr =
+ __ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+
+static struct kset *dump_kset;
+
+static void diag308_dump(void *dump_block)
+{
+ diag308(DIAG308_SET, dump_block);
+ while (1) {
+ if (diag308(DIAG308_DUMP, NULL) != 0x302)
+ break;
+ udelay_simple(USEC_PER_SEC);
+ }
+}
+
+static void __dump_run(void *unused)
+{
+ struct ccw_dev_id devid;
+ static char buf[100];
+
+ switch (dump_method) {
+ case DUMP_METHOD_CCW_CIO:
+ devid.devno = dump_block_ccw->ipl_info.ccw.devno;
+ devid.ssid = 0;
+ reipl_ccw_dev(&devid);
+ break;
+ case DUMP_METHOD_CCW_VM:
+ sprintf(buf, "STORE STATUS");
+ __cpcmd(buf, NULL, 0, NULL);
+ sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
+ __cpcmd(buf, NULL, 0, NULL);
+ break;
+ case DUMP_METHOD_CCW_DIAG:
+ diag308_dump(dump_block_ccw);
+ break;
+ case DUMP_METHOD_FCP_DIAG:
+ diag308_dump(dump_block_fcp);
+ break;
+ default:
+ break;
+ }
+}
+
+static void dump_run(struct shutdown_trigger *trigger)
+{
+ if (dump_method == DUMP_METHOD_NONE)
+ return;
+ smp_send_stop();
+ smp_call_ipl_cpu(__dump_run, NULL);
+}
+
+static int __init dump_ccw_init(void)
+{
+ int rc;
+
+ dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_ccw)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_ccw_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_ccw);
+ return rc;
+ }
+ dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+ dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+ dump_capabilities |= DUMP_TYPE_CCW;
+ return 0;
+}
+
+static int __init dump_fcp_init(void)
+{
+ int rc;
+
+ if (!sclp_ipl_info.has_dump)
+ return 0; /* LDIPL DUMP is not installed */
+ if (!diag308_set_works)
+ return 0;
+ dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_fcp)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_fcp_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_fcp);
+ return rc;
+ }
+ dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
+ dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
+ dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+ dump_capabilities |= DUMP_TYPE_FCP;
+ return 0;
+}
+
+static int __init dump_init(void)
+{
+ int rc;
+
+ dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
+ if (!dump_kset)
+ return -ENOMEM;
+ rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+ if (rc) {
+ kset_unregister(dump_kset);
+ return rc;
+ }
+ rc = dump_ccw_init();
+ if (rc)
+ return rc;
+ rc = dump_fcp_init();
+ if (rc)
+ return rc;
+ dump_set_type(DUMP_TYPE_NONE);
+ return 0;
+}
+
+static struct shutdown_action __refdata dump_action = {
+ .name = SHUTDOWN_ACTION_DUMP_STR,
+ .fn = dump_run,
+ .init = dump_init,
+};
+
+static void dump_reipl_run(struct shutdown_trigger *trigger)
+{
+ unsigned long ipib = (unsigned long) reipl_block_actual;
+ unsigned int csum;
+
+ csum = csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+ mem_assign_absolute(S390_lowcore.ipib, ipib);
+ mem_assign_absolute(S390_lowcore.ipib_checksum, csum);
+ dump_run(trigger);
+}
+
+static int __init dump_reipl_init(void)
+{
+ if (!diag308_set_works)
+ return -EOPNOTSUPP;
+ else
+ return 0;
+}
+
+static struct shutdown_action __refdata dump_reipl_action = {
+ .name = SHUTDOWN_ACTION_DUMP_REIPL_STR,
+ .fn = dump_reipl_run,
+ .init = dump_reipl_init,
+};
+
+/*
+ * vmcmd shutdown action: Trigger vm command on shutdown.
+ */
+
+static char vmcmd_on_reboot[128];
+static char vmcmd_on_panic[128];
+static char vmcmd_on_halt[128];
+static char vmcmd_on_poff[128];
+static char vmcmd_on_restart[128];
+
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart);
+
+static struct attribute *vmcmd_attrs[] = {
+ &sys_vmcmd_on_reboot_attr.attr,
+ &sys_vmcmd_on_panic_attr.attr,
+ &sys_vmcmd_on_halt_attr.attr,
+ &sys_vmcmd_on_poff_attr.attr,
+ &sys_vmcmd_on_restart_attr.attr,
+ NULL,
+};
+
+static struct attribute_group vmcmd_attr_group = {
+ .attrs = vmcmd_attrs,
+};
+
+static struct kset *vmcmd_kset;
+
+static void vmcmd_run(struct shutdown_trigger *trigger)
+{
+ char *cmd;
+
+ if (strcmp(trigger->name, ON_REIPL_STR) == 0)
+ cmd = vmcmd_on_reboot;
+ else if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+ cmd = vmcmd_on_panic;
+ else if (strcmp(trigger->name, ON_HALT_STR) == 0)
+ cmd = vmcmd_on_halt;
+ else if (strcmp(trigger->name, ON_POFF_STR) == 0)
+ cmd = vmcmd_on_poff;
+ else if (strcmp(trigger->name, ON_RESTART_STR) == 0)
+ cmd = vmcmd_on_restart;
+ else
+ return;
+
+ if (strlen(cmd) == 0)
+ return;
+ __cpcmd(cmd, NULL, 0, NULL);
+}
+
+static int vmcmd_init(void)
+{
+ if (!MACHINE_IS_VM)
+ return -EOPNOTSUPP;
+ vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
+ if (!vmcmd_kset)
+ return -ENOMEM;
+ return sysfs_create_group(&vmcmd_kset->kobj, &vmcmd_attr_group);
+}
+
+static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
+ vmcmd_run, vmcmd_init};
+
+/*
+ * stop shutdown action: Stop Linux on shutdown.
+ */
+
+static void stop_run(struct shutdown_trigger *trigger)
+{
+ if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+ strcmp(trigger->name, ON_RESTART_STR) == 0)
+ disabled_wait((unsigned long) __builtin_return_address(0));
+ smp_stop_cpu();
+}
+
+static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
+ stop_run, NULL};
+
+/* action list */
+
+static struct shutdown_action *shutdown_actions_list[] = {
+ &ipl_action, &reipl_action, &dump_reipl_action, &dump_action,
+ &vmcmd_action, &stop_action};
+#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *))
+
+/*
+ * Trigger section
+ */
+
+static struct kset *shutdown_actions_kset;
+
+static int set_trigger(const char *buf, struct shutdown_trigger *trigger,
+ size_t len)
+{
+ int i;
+
+ for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+ if (sysfs_streq(buf, shutdown_actions_list[i]->name)) {
+ if (shutdown_actions_list[i]->init_rc) {
+ return shutdown_actions_list[i]->init_rc;
+ } else {
+ trigger->action = shutdown_actions_list[i];
+ return len;
+ }
+ }
+ }
+ return -EINVAL;
+}
+
+/* on reipl */
+
+static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
+ &reipl_action};
+
+static ssize_t on_reboot_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+}
+
+static ssize_t on_reboot_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_reboot_trigger, len);
+}
+static struct kobj_attribute on_reboot_attr = __ATTR_RW(on_reboot);
+
+static void do_machine_restart(char *__unused)
+{
+ smp_send_stop();
+ on_reboot_trigger.action->fn(&on_reboot_trigger);
+ reipl_run(NULL);
+}
+void (*_machine_restart)(char *command) = do_machine_restart;
+
+/* on panic */
+
+static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
+
+static ssize_t on_panic_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_panic_trigger.action->name);
+}
+
+static ssize_t on_panic_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_panic_trigger, len);
+}
+static struct kobj_attribute on_panic_attr = __ATTR_RW(on_panic);
+
+static void do_panic(void)
+{
+ lgr_info_log();
+ on_panic_trigger.action->fn(&on_panic_trigger);
+ stop_run(&on_panic_trigger);
+}
+
+/* on restart */
+
+static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
+ &stop_action};
+
+static ssize_t on_restart_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_restart_trigger.action->name);
+}
+
+static ssize_t on_restart_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_restart_trigger, len);
+}
+static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
+
+static void __do_restart(void *ignore)
+{
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ smp_send_stop();
+#ifdef CONFIG_CRASH_DUMP
+ crash_kexec(NULL);
+#endif
+ on_restart_trigger.action->fn(&on_restart_trigger);
+ stop_run(&on_restart_trigger);
+}
+
+void do_restart(void)
+{
+ tracing_off();
+ debug_locks_off();
+ lgr_info_log();
+ smp_call_online_cpu(__do_restart, NULL);
+}
+
+/* on halt */
+
+static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
+
+static ssize_t on_halt_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_halt_trigger.action->name);
+}
+
+static ssize_t on_halt_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_halt_trigger, len);
+}
+static struct kobj_attribute on_halt_attr = __ATTR_RW(on_halt);
+
+static void do_machine_halt(void)
+{
+ smp_send_stop();
+ on_halt_trigger.action->fn(&on_halt_trigger);
+ stop_run(&on_halt_trigger);
+}
+void (*_machine_halt)(void) = do_machine_halt;
+
+/* on power off */
+
+static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
+
+static ssize_t on_poff_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%s\n", on_poff_trigger.action->name);
+}
+
+static ssize_t on_poff_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return set_trigger(buf, &on_poff_trigger, len);
+}
+static struct kobj_attribute on_poff_attr = __ATTR_RW(on_poff);
+
+static void do_machine_power_off(void)
+{
+ smp_send_stop();
+ on_poff_trigger.action->fn(&on_poff_trigger);
+ stop_run(&on_poff_trigger);
+}
+void (*_machine_power_off)(void) = do_machine_power_off;
+
+static struct attribute *shutdown_action_attrs[] = {
+ &on_restart_attr.attr,
+ &on_reboot_attr.attr,
+ &on_panic_attr.attr,
+ &on_halt_attr.attr,
+ &on_poff_attr.attr,
+ NULL,
+};
+
+static struct attribute_group shutdown_action_attr_group = {
+ .attrs = shutdown_action_attrs,
+};
+
+static void __init shutdown_triggers_init(void)
+{
+ shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL,
+ firmware_kobj);
+ if (!shutdown_actions_kset)
+ goto fail;
+ if (sysfs_create_group(&shutdown_actions_kset->kobj,
+ &shutdown_action_attr_group))
+ goto fail;
+ return;
+fail:
+ panic("shutdown_triggers_init failed\n");
+}
+
+static void __init shutdown_actions_init(void)
+{
+ int i;
+
+ for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+ if (!shutdown_actions_list[i]->init)
+ continue;
+ shutdown_actions_list[i]->init_rc =
+ shutdown_actions_list[i]->init();
+ }
+}
+
+static int __init s390_ipl_init(void)
+{
+ char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40};
+
+ sclp_get_ipl_info(&sclp_ipl_info);
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * returned by read SCP info is invalid (contains EBCDIC blanks)
+ * when the system has been booted via diag308. In that case we use
+ * the value from diag308, if available.
+ *
+ * There are also systems where diag308 store does not work in
+ * case the system is booted from HMC. Fortunately in this case
+ * READ SCP info provides the correct value.
+ */
+ if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 &&
+ diag308_set_works)
+ memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm,
+ LOADPARM_LEN);
+ shutdown_actions_init();
+ shutdown_triggers_init();
+ return 0;
+}
+
+__initcall(s390_ipl_init);
+
+static void __init strncpy_skip_quote(char *dst, char *src, int n)
+{
+ int sx, dx;
+
+ dx = 0;
+ for (sx = 0; src[sx] != 0; sx++) {
+ if (src[sx] == '"')
+ continue;
+ dst[dx++] = src[sx];
+ if (dx >= n)
+ break;
+ }
+}
+
+static int __init vmcmd_on_reboot_setup(char *str)
+{
+ if (!MACHINE_IS_VM)
+ return 1;
+ strncpy_skip_quote(vmcmd_on_reboot, str, 127);
+ vmcmd_on_reboot[127] = 0;
+ on_reboot_trigger.action = &vmcmd_action;
+ return 1;
+}
+__setup("vmreboot=", vmcmd_on_reboot_setup);
+
+static int __init vmcmd_on_panic_setup(char *str)
+{
+ if (!MACHINE_IS_VM)
+ return 1;
+ strncpy_skip_quote(vmcmd_on_panic, str, 127);
+ vmcmd_on_panic[127] = 0;
+ on_panic_trigger.action = &vmcmd_action;
+ return 1;
+}
+__setup("vmpanic=", vmcmd_on_panic_setup);
+
+static int __init vmcmd_on_halt_setup(char *str)
+{
+ if (!MACHINE_IS_VM)
+ return 1;
+ strncpy_skip_quote(vmcmd_on_halt, str, 127);
+ vmcmd_on_halt[127] = 0;
+ on_halt_trigger.action = &vmcmd_action;
+ return 1;
+}
+__setup("vmhalt=", vmcmd_on_halt_setup);
+
+static int __init vmcmd_on_poff_setup(char *str)
+{
+ if (!MACHINE_IS_VM)
+ return 1;
+ strncpy_skip_quote(vmcmd_on_poff, str, 127);
+ vmcmd_on_poff[127] = 0;
+ on_poff_trigger.action = &vmcmd_action;
+ return 1;
+}
+__setup("vmpoff=", vmcmd_on_poff_setup);
+
+static int on_panic_notify(struct notifier_block *self,
+ unsigned long event, void *data)
+{
+ do_panic();
+ return NOTIFY_OK;
+}
+
+static struct notifier_block on_panic_nb = {
+ .notifier_call = on_panic_notify,
+ .priority = INT_MIN,
+};
+
+void __init setup_ipl(void)
+{
+ ipl_info.type = get_ipl_type();
+ switch (ipl_info.type) {
+ case IPL_TYPE_CCW:
+ ipl_info.data.ccw.dev_id.devno = ipl_devno;
+ ipl_info.data.ccw.dev_id.ssid = 0;
+ break;
+ case IPL_TYPE_FCP:
+ case IPL_TYPE_FCP_DUMP:
+ ipl_info.data.fcp.dev_id.devno =
+ IPL_PARMBLOCK_START->ipl_info.fcp.devno;
+ ipl_info.data.fcp.dev_id.ssid = 0;
+ ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
+ ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
+ break;
+ case IPL_TYPE_NSS:
+ strncpy(ipl_info.data.nss.name, kernel_nss_name,
+ sizeof(ipl_info.data.nss.name));
+ break;
+ case IPL_TYPE_UNKNOWN:
+ /* We have no info to copy */
+ break;
+ }
+ atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
+}
+
+void __init ipl_update_parameters(void)
+{
+ int rc;
+
+ rc = diag308(DIAG308_STORE, &ipl_block);
+ if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG))
+ diag308_set_works = 1;
+}
+
+void __init ipl_save_parameters(void)
+{
+ struct cio_iplinfo iplinfo;
+ void *src, *dst;
+
+ if (cio_get_iplinfo(&iplinfo))
+ return;
+
+ ipl_devno = iplinfo.devno;
+ ipl_flags |= IPL_DEVNO_VALID;
+ if (!iplinfo.is_qdio)
+ return;
+ ipl_flags |= IPL_PARMBLOCK_VALID;
+ src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr;
+ dst = (void *)IPL_PARMBLOCK_ORIGIN;
+ memmove(dst, src, PAGE_SIZE);
+ S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
+}
+
+static LIST_HEAD(rcall);
+static DEFINE_MUTEX(rcall_mutex);
+
+void register_reset_call(struct reset_call *reset)
+{
+ mutex_lock(&rcall_mutex);
+ list_add(&reset->list, &rcall);
+ mutex_unlock(&rcall_mutex);
+}
+EXPORT_SYMBOL_GPL(register_reset_call);
+
+void unregister_reset_call(struct reset_call *reset)
+{
+ mutex_lock(&rcall_mutex);
+ list_del(&reset->list);
+ mutex_unlock(&rcall_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_reset_call);
+
+static void do_reset_calls(void)
+{
+ struct reset_call *reset;
+
+ if (diag308_set_works) {
+ diag308_reset();
+ return;
+ }
+ list_for_each_entry(reset, &rcall, list)
+ reset->fn();
+}
+
+u32 dump_prefix_page;
+
+void s390_reset_system(void (*fn_pre)(void),
+ void (*fn_post)(void *), void *data)
+{
+ struct _lowcore *lc;
+
+ lc = (struct _lowcore *)(unsigned long) store_prefix();
+
+ /* Stack for interrupt/machine check handler */
+ lc->panic_stack = S390_lowcore.panic_stack;
+
+ /* Save prefix page address for dump case */
+ dump_prefix_page = (u32)(unsigned long) lc;
+
+ /* Disable prefixing */
+ set_prefix(0);
+
+ /* Disable lowcore protection */
+ __ctl_clear_bit(0,28);
+
+ /* Set new machine check handler */
+ S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
+ S390_lowcore.mcck_new_psw.addr =
+ PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler;
+
+ /* Set new program check handler */
+ S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
+ S390_lowcore.program_new_psw.addr =
+ PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+
+ /*
+ * Clear subchannel ID and number to signal new kernel that no CCW or
+ * SCSI IPL has been done (for kexec and kdump)
+ */
+ S390_lowcore.subchannel_id = 0;
+ S390_lowcore.subchannel_nr = 0;
+
+ /* Store status at absolute zero */
+ store_status();
+
+ /* Call function before reset */
+ if (fn_pre)
+ fn_pre();
+ do_reset_calls();
+ /* Call function after reset */
+ if (fn_post)
+ fn_post(data);
+}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
new file mode 100644
index 000000000..e9d9addfa
--- /dev/null
+++ b/arch/s390/kernel/irq.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright IBM Corp. 2004, 2011
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Thomas Spatzier <tspat@de.ibm.com>,
+ *
+ * This file contains interrupt related functions.
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/cputime.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include "entry.h"
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
+
+struct irq_class {
+ int irq;
+ char *name;
+ char *desc;
+};
+
+/*
+ * The list of "main" irq classes on s390. This is the list of interrupts
+ * that appear both in /proc/stat ("intr" line) and /proc/interrupts.
+ * Historically only external and I/O interrupts have been part of /proc/stat.
+ * We can't add the split external and I/O sub classes since the first field
+ * in the "intr" line in /proc/stat is supposed to be the sum of all other
+ * fields.
+ * Since the external and I/O interrupt fields are already sums we would end
+ * up with having a sum which accounts each interrupt twice.
+ */
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+ {.irq = EXT_INTERRUPT, .name = "EXT"},
+ {.irq = IO_INTERRUPT, .name = "I/O"},
+ {.irq = THIN_INTERRUPT, .name = "AIO"},
+};
+
+/*
+ * The list of split external and I/O interrupts that appear only in
+ * /proc/interrupts.
+ * In addition this list contains non external / I/O events like NMIs.
+ */
+static const struct irq_class irqclass_sub_desc[] = {
+ {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
+ {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
+ {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
+ {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"},
+ {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"},
+ {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+ {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"},
+ {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"},
+ {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"},
+ {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
+ {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+ {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+ {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
+ {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+ {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+ {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
+ {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
+ {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
+ {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
+ {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
+ {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
+ {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
+ {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
+ {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"},
+ {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
+ {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+ {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" },
+ {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" },
+ {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
+ {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"},
+ {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
+};
+
+void __init init_IRQ(void)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
+}
+
+void do_IRQ(struct pt_regs *regs, int irq)
+{
+ struct pt_regs *old_regs;
+
+ old_regs = set_irq_regs(regs);
+ irq_enter();
+ if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ generic_handle_irq(irq);
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+
+/*
+ * show_interrupts is needed by /proc/interrupts.
+ */
+int show_interrupts(struct seq_file *p, void *v)
+{
+ int index = *(loff_t *) v;
+ int cpu, irq;
+
+ get_online_cpus();
+ if (index == 0) {
+ seq_puts(p, " ");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "CPU%d ", cpu);
+ seq_putc(p, '\n');
+ }
+ if (index < NR_IRQS) {
+ if (index >= NR_IRQS_BASE)
+ goto out;
+ seq_printf(p, "%s: ", irqclass_main_desc[index].name);
+ irq = irqclass_main_desc[index].irq;
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+ seq_putc(p, '\n');
+ goto out;
+ }
+ for (index = 0; index < NR_ARCH_IRQS; index++) {
+ seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
+ irq = irqclass_sub_desc[index].irq;
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ",
+ per_cpu(irq_stat, cpu).irqs[irq]);
+ if (irqclass_sub_desc[index].desc)
+ seq_printf(p, " %s", irqclass_sub_desc[index].desc);
+ seq_putc(p, '\n');
+ }
+out:
+ put_online_cpus();
+ return 0;
+}
+
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+ return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
+}
+
+/*
+ * Switch to the asynchronous interrupt stack for softirq execution.
+ */
+void do_softirq_own_stack(void)
+{
+ unsigned long old, new;
+
+ /* Get current stack pointer. */
+ asm volatile("la %0,0(15)" : "=a" (old));
+ /* Check against async. stack address range. */
+ new = S390_lowcore.async_stack;
+ if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
+ /* Need to switch to the async. stack. */
+ new -= STACK_FRAME_OVERHEAD;
+ ((struct stack_frame *) new)->back_chain = old;
+ asm volatile(" la 15,0(%0)\n"
+ " basr 14,%2\n"
+ " la 15,0(%1)\n"
+ : : "a" (new), "a" (old),
+ "a" (__do_softirq)
+ : "0", "1", "2", "3", "4", "5", "14",
+ "cc", "memory" );
+ } else {
+ /* We are already on the async stack. */
+ __do_softirq();
+ }
+}
+
+/*
+ * ext_int_hash[index] is the list head for all external interrupts that hash
+ * to this index.
+ */
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
+
+struct ext_int_info {
+ ext_int_handler_t handler;
+ struct hlist_node entry;
+ struct rcu_head rcu;
+ u16 code;
+};
+
+/* ext_int_hash_lock protects the handler lists for external interrupts */
+static DEFINE_SPINLOCK(ext_int_hash_lock);
+
+static inline int ext_hash(u16 code)
+{
+ BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+ return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
+}
+
+int register_external_irq(u16 code, ext_int_handler_t handler)
+{
+ struct ext_int_info *p;
+ unsigned long flags;
+ int index;
+
+ p = kmalloc(sizeof(*p), GFP_ATOMIC);
+ if (!p)
+ return -ENOMEM;
+ p->code = code;
+ p->handler = handler;
+ index = ext_hash(code);
+
+ spin_lock_irqsave(&ext_int_hash_lock, flags);
+ hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
+ spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(register_external_irq);
+
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
+{
+ struct ext_int_info *p;
+ unsigned long flags;
+ int index = ext_hash(code);
+
+ spin_lock_irqsave(&ext_int_hash_lock, flags);
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (p->code == code && p->handler == handler) {
+ hlist_del_rcu(&p->entry);
+ kfree_rcu(p, rcu);
+ }
+ }
+ spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(unregister_external_irq);
+
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
+{
+ struct pt_regs *regs = get_irq_regs();
+ struct ext_code ext_code;
+ struct ext_int_info *p;
+ int index;
+
+ ext_code = *(struct ext_code *) &regs->int_code;
+ if (ext_code.code != EXT_IRQ_CLK_COMP)
+ set_cpu_flag(CIF_NOHZ_DELAY);
+
+ index = ext_hash(ext_code.code);
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+ if (unlikely(p->code != ext_code.code))
+ continue;
+ p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+ }
+ rcu_read_unlock();
+ return IRQ_HANDLED;
+}
+
+static struct irqaction external_interrupt = {
+ .name = "EXT",
+ .handler = do_ext_interrupt,
+};
+
+void __init init_ext_interrupts(void)
+{
+ int idx;
+
+ for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+ INIT_HLIST_HEAD(&ext_int_hash[idx]);
+
+ irq_set_chip_and_handler(EXT_INTERRUPT,
+ &dummy_irq_chip, handle_percpu_irq);
+ setup_irq(EXT_INTERRUPT, &external_interrupt);
+}
+
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
+
+void irq_subclass_register(enum irq_subclass subclass)
+{
+ spin_lock(&irq_subclass_lock);
+ if (!irq_subclass_refcount[subclass])
+ ctl_set_bit(0, subclass);
+ irq_subclass_refcount[subclass]++;
+ spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_register);
+
+void irq_subclass_unregister(enum irq_subclass subclass)
+{
+ spin_lock(&irq_subclass_lock);
+ irq_subclass_refcount[subclass]--;
+ if (!irq_subclass_refcount[subclass])
+ ctl_clear_bit(0, subclass);
+ spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 000000000..a90299600
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,109 @@
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <linux/jump_label.h>
+#include <asm/ipl.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+struct insn {
+ u16 opcode;
+ s32 offset;
+} __packed;
+
+struct insn_args {
+ struct jump_entry *entry;
+ enum jump_label_type type;
+};
+
+static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
+{
+ /* brcl 0,0 */
+ insn->opcode = 0xc004;
+ insn->offset = 0;
+}
+
+static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
+{
+ /* brcl 15,offset */
+ insn->opcode = 0xc0f4;
+ insn->offset = (entry->target - entry->code) >> 1;
+}
+
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+ struct insn *new)
+{
+ unsigned char *ipc = (unsigned char *)entry->code;
+ unsigned char *ipe = (unsigned char *)expected;
+ unsigned char *ipn = (unsigned char *)new;
+
+ pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
+ pr_emerg("Found: %02x %02x %02x %02x %02x %02x\n",
+ ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
+ pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
+ ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
+ pr_emerg("New: %02x %02x %02x %02x %02x %02x\n",
+ ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+ panic("Corrupted kernel text");
+}
+
+static struct insn orignop = {
+ .opcode = 0xc004,
+ .offset = JUMP_LABEL_NOP_OFFSET >> 1,
+};
+
+static void __jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type,
+ int init)
+{
+ struct insn old, new;
+
+ if (type == JUMP_LABEL_ENABLE) {
+ jump_label_make_nop(entry, &old);
+ jump_label_make_branch(entry, &new);
+ } else {
+ jump_label_make_branch(entry, &old);
+ jump_label_make_nop(entry, &new);
+ }
+ if (init) {
+ if (memcmp((void *)entry->code, &orignop, sizeof(orignop)))
+ jump_label_bug(entry, &orignop, &new);
+ } else {
+ if (memcmp((void *)entry->code, &old, sizeof(old)))
+ jump_label_bug(entry, &old, &new);
+ }
+ s390_kernel_write((void *)entry->code, &new, sizeof(new));
+}
+
+static int __sm_arch_jump_label_transform(void *data)
+{
+ struct insn_args *args = data;
+
+ __jump_label_transform(args->entry, args->type, 0);
+ return 0;
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ struct insn_args args;
+
+ args.entry = entry;
+ args.type = type;
+
+ stop_machine(__sm_arch_jump_label_transform, &args, NULL);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ __jump_label_transform(entry, type, 1);
+}
+
+#endif
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
new file mode 100644
index 000000000..389db56a2
--- /dev/null
+++ b/arch/s390/kernel/kprobes.c
@@ -0,0 +1,733 @@
+/*
+ * Kernel Probes (KProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
+ */
+
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/stop_machine.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+#include <asm/dis.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = { };
+
+DEFINE_INSN_CACHE_OPS(dmainsn);
+
+static void *alloc_dmainsn_page(void)
+{
+ return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+}
+
+static void free_dmainsn_page(void *page)
+{
+ free_page((unsigned long)page);
+}
+
+struct kprobe_insn_cache kprobe_dmainsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
+ .alloc = alloc_dmainsn_page,
+ .free = free_dmainsn_page,
+ .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+ .insn_size = MAX_INSN_SIZE,
+};
+
+static void copy_instruction(struct kprobe *p)
+{
+ unsigned long ip = (unsigned long) p->addr;
+ s64 disp, new_disp;
+ u64 addr, new_addr;
+
+ if (ftrace_location(ip) == ip) {
+ /*
+ * If kprobes patches the instruction that is morphed by
+ * ftrace make sure that kprobes always sees the branch
+ * "jg .+24" that skips the mcount block or the "brcl 0,0"
+ * in case of hotpatch.
+ */
+ ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
+ p->ainsn.is_ftrace_insn = 1;
+ } else
+ memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
+ p->opcode = p->ainsn.insn[0];
+ if (!probe_is_insn_relative_long(p->ainsn.insn))
+ return;
+ /*
+ * For pc-relative instructions in RIL-b or RIL-c format patch the
+ * RI2 displacement field. We have already made sure that the insn
+ * slot for the patched instruction is within the same 2GB area
+ * as the original instruction (either kernel image or module area).
+ * Therefore the new displacement will always fit.
+ */
+ disp = *(s32 *)&p->ainsn.insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&p->ainsn.insn[1] = new_disp;
+}
+NOKPROBE_SYMBOL(copy_instruction);
+
+static inline int is_kernel_addr(void *addr)
+{
+ return addr < (void *)_end;
+}
+
+static int s390_get_insn_slot(struct kprobe *p)
+{
+ /*
+ * Get an insn slot that is within the same 2GB area like the original
+ * instruction. That way instructions with a 32bit signed displacement
+ * field can be patched and executed within the insn slot.
+ */
+ p->ainsn.insn = NULL;
+ if (is_kernel_addr(p->addr))
+ p->ainsn.insn = get_dmainsn_slot();
+ else if (is_module_addr(p->addr))
+ p->ainsn.insn = get_insn_slot();
+ return p->ainsn.insn ? 0 : -ENOMEM;
+}
+NOKPROBE_SYMBOL(s390_get_insn_slot);
+
+static void s390_free_insn_slot(struct kprobe *p)
+{
+ if (!p->ainsn.insn)
+ return;
+ if (is_kernel_addr(p->addr))
+ free_dmainsn_slot(p->ainsn.insn, 0);
+ else
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+}
+NOKPROBE_SYMBOL(s390_free_insn_slot);
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+ if ((unsigned long) p->addr & 0x01)
+ return -EINVAL;
+ /* Make sure the probe isn't going on a difficult instruction */
+ if (probe_is_prohibited_opcode(p->addr))
+ return -EINVAL;
+ if (s390_get_insn_slot(p))
+ return -ENOMEM;
+ copy_instruction(p);
+ return 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
+
+int arch_check_ftrace_location(struct kprobe *p)
+{
+ return 0;
+}
+
+struct swap_insn_args {
+ struct kprobe *p;
+ unsigned int arm_kprobe : 1;
+};
+
+static int swap_instruction(void *data)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long status = kcb->kprobe_status;
+ struct swap_insn_args *args = data;
+ struct ftrace_insn new_insn, *insn;
+ struct kprobe *p = args->p;
+ size_t len;
+
+ new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+ len = sizeof(new_insn.opc);
+ if (!p->ainsn.is_ftrace_insn)
+ goto skip_ftrace;
+ len = sizeof(new_insn);
+ insn = (struct ftrace_insn *) p->addr;
+ if (args->arm_kprobe) {
+ if (is_ftrace_nop(insn))
+ new_insn.disp = KPROBE_ON_FTRACE_NOP;
+ else
+ new_insn.disp = KPROBE_ON_FTRACE_CALL;
+ } else {
+ ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr);
+ if (insn->disp == KPROBE_ON_FTRACE_NOP)
+ ftrace_generate_nop_insn(&new_insn);
+ }
+skip_ftrace:
+ kcb->kprobe_status = KPROBE_SWAP_INST;
+ s390_kernel_write(p->addr, &new_insn, len);
+ kcb->kprobe_status = status;
+ return 0;
+}
+NOKPROBE_SYMBOL(swap_instruction);
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+ struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
+
+ stop_machine(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+ struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
+
+ stop_machine(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+ s390_free_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
+
+static void enable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
+{
+ struct per_regs per_kprobe;
+
+ /* Set up the PER control registers %cr9-%cr11 */
+ per_kprobe.control = PER_EVENT_IFETCH;
+ per_kprobe.start = ip;
+ per_kprobe.end = ip;
+
+ /* Save control regs and psw mask */
+ __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+ kcb->kprobe_saved_imask = regs->psw.mask &
+ (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
+
+ /* Set PER control regs, turns on single step for the given address */
+ __ctl_load(per_kprobe, 9, 11);
+ regs->psw.mask |= PSW_MASK_PER;
+ regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
+ regs->psw.addr = ip | PSW_ADDR_AMODE;
+}
+NOKPROBE_SYMBOL(enable_singlestep);
+
+static void disable_singlestep(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs,
+ unsigned long ip)
+{
+ /* Restore control regs and psw mask, set new psw address */
+ __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+ regs->psw.mask &= ~PSW_MASK_PER;
+ regs->psw.mask |= kcb->kprobe_saved_imask;
+ regs->psw.addr = ip | PSW_ADDR_AMODE;
+}
+NOKPROBE_SYMBOL(disable_singlestep);
+
+/*
+ * Activate a kprobe by storing its pointer to current_kprobe. The
+ * previous kprobe is stored in kcb->prev_kprobe. A stack of up to
+ * two kprobes can be active, see KPROBE_REENTER.
+ */
+static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+ kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ __this_cpu_write(current_kprobe, p);
+}
+NOKPROBE_SYMBOL(push_kprobe);
+
+/*
+ * Deactivate a kprobe by backing up to the previous state. If the
+ * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
+ * for any other state prev_kprobe.kp will be NULL.
+ */
+static void pop_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+NOKPROBE_SYMBOL(pop_kprobe);
+
+void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
+
+ /* Replace the return addr with trampoline addr */
+ regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_prepare_kretprobe);
+
+static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SSDONE:
+ case KPROBE_HIT_ACTIVE:
+ kprobes_inc_nmissed_count(p);
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ default:
+ /*
+ * A kprobe on the code path to single step an instruction
+ * is a BUG. The code path resides in the .kprobes.text
+ * section and is executed with interrupts disabled.
+ */
+ printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr);
+ dump_kprobe(p);
+ BUG();
+ }
+}
+NOKPROBE_SYMBOL(kprobe_reenter_check);
+
+static int kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb;
+ struct kprobe *p;
+
+ /*
+ * We want to disable preemption for the entire duration of kprobe
+ * processing. That includes the calls to the pre/post handlers
+ * and single stepping the kprobe instruction.
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+ p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
+
+ if (p) {
+ if (kprobe_running()) {
+ /*
+ * We have hit a kprobe while another is still
+ * active. This can happen in the pre and post
+ * handler. Single step the instruction of the
+ * new probe but do not call any handler function
+ * of this secondary kprobe.
+ * push_kprobe and pop_kprobe saves and restores
+ * the currently active kprobe.
+ */
+ kprobe_reenter_check(kcb, p);
+ push_kprobe(kcb, p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ } else {
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with single stepping. If we have a
+ * pre-handler and it returned non-zero, it prepped
+ * for calling the break_handler below on re-entry
+ * for jprobe processing, so get out doing nothing
+ * more here.
+ */
+ push_kprobe(kcb, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (p->pre_handler && p->pre_handler(p, regs))
+ return 1;
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ }
+ enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
+ return 1;
+ } else if (kprobe_running()) {
+ p = __this_cpu_read(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs)) {
+ /*
+ * Continuation after the jprobe completed and
+ * caused the jprobe_return trap. The jprobe
+ * break_handler "returns" to the original
+ * function that still has the kprobe breakpoint
+ * installed. We continue with single stepping.
+ */
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ enable_singlestep(kcb, regs,
+ (unsigned long) p->ainsn.insn);
+ return 1;
+ } /* else:
+ * No kprobe at this address and the current kprobe
+ * has no break handler (no jprobe!). The kernel just
+ * exploded, let the standard trap handler pick up the
+ * pieces.
+ */
+ } /* else:
+ * No kprobe at this address and no active kprobe. The trap has
+ * not been caused by a kprobe breakpoint. The race of breakpoint
+ * vs. kprobe remove does not exist because on s390 as we use
+ * stop_machine to arm/disarm the breakpoints.
+ */
+ preempt_enable_no_resched();
+ return 0;
+}
+NOKPROBE_SYMBOL(kprobe_handler);
+
+/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe
+ * causes the handlers to fire
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+ asm volatile(".global kretprobe_trampoline\n"
+ "kretprobe_trampoline: bcr 0,0\n");
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address;
+ unsigned long trampoline_address;
+ kprobe_opcode_t *correct_ret_addr;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because an multiple functions in the call path
+ * have a return probe installed on them, and/or more than one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ ri = NULL;
+ orig_ret_address = 0;
+ correct_ret_addr = NULL;
+ trampoline_address = (unsigned long) &kretprobe_trampoline;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long) ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long) ri->ret_addr;
+
+ if (ri->rp && ri->rp->handler) {
+ ri->ret_addr = correct_ret_addr;
+ ri->rp->handler(ri, regs);
+ }
+
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
+
+ pop_kprobe(get_kprobe_ctlblk());
+ kretprobe_hash_unlock(current, &flags);
+ preempt_enable_no_resched();
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+NOKPROBE_SYMBOL(trampoline_probe_handler);
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction. The address of this
+ * copy is p->ainsn.insn.
+ */
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
+ int fixup = probe_get_fixup_type(p->ainsn.insn);
+
+ /* Check if the kprobes location is an enabled ftrace caller */
+ if (p->ainsn.is_ftrace_insn) {
+ struct ftrace_insn *insn = (struct ftrace_insn *) p->addr;
+ struct ftrace_insn call_insn;
+
+ ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr);
+ /*
+ * A kprobe on an enabled ftrace call site actually single
+ * stepped an unconditional branch (ftrace nop equivalent).
+ * Now we need to fixup things and pretend that a brasl r0,...
+ * was executed instead.
+ */
+ if (insn->disp == KPROBE_ON_FTRACE_CALL) {
+ ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE;
+ regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn);
+ }
+ }
+
+ if (fixup & FIXUP_PSW_NORMAL)
+ ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
+
+ if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+ int ilen = insn_length(p->ainsn.insn[0] >> 8);
+ if (ip - (unsigned long) p->ainsn.insn == ilen)
+ ip = (unsigned long) p->addr + ilen;
+ }
+
+ if (fixup & FIXUP_RETURN_REGISTER) {
+ int reg = (p->ainsn.insn[0] & 0xf0) >> 4;
+ regs->gprs[reg] += (unsigned long) p->addr -
+ (unsigned long) p->ainsn.insn;
+ }
+
+ disable_singlestep(kcb, regs, ip);
+}
+NOKPROBE_SYMBOL(resume_execution);
+
+static int post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe *p = kprobe_running();
+
+ if (!p)
+ return 0;
+
+ if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+
+ resume_execution(p, regs);
+ pop_kprobe(kcb);
+ preempt_enable_no_resched();
+
+ /*
+ * if somebody else is singlestepping across a probe point, psw mask
+ * will have PER set, in which case, continue the remaining processing
+ * of do_single_step, as if this is not a probe hit.
+ */
+ if (regs->psw.mask & PSW_MASK_PER)
+ return 0;
+
+ return 1;
+}
+NOKPROBE_SYMBOL(post_kprobe_handler);
+
+static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe *p = kprobe_running();
+ const struct exception_table_entry *entry;
+
+ switch(kcb->kprobe_status) {
+ case KPROBE_SWAP_INST:
+ /* We are here because the instruction replacement failed */
+ return 0;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the nip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ disable_singlestep(kcb, regs, (unsigned long) p->addr);
+ pop_kprobe(kcb);
+ preempt_enable_no_resched();
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accounting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(p);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (p->fault_handler && p->fault_handler(p, regs, trapnr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+ if (entry) {
+ regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
+ return 1;
+ }
+
+ /*
+ * fixup_exception() could not handle it,
+ * Let do_page_fault() fix it.
+ */
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+NOKPROBE_SYMBOL(kprobe_trap_handler);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ int ret;
+
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_disable();
+ ret = kprobe_trap_handler(regs, trapnr);
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+ return ret;
+}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *) data;
+ struct pt_regs *regs = args->regs;
+ int ret = NOTIFY_DONE;
+
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_disable();
+
+ switch (val) {
+ case DIE_BPT:
+ if (kprobe_handler(regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (post_kprobe_handler(regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_TRAP:
+ if (!preemptible() && kprobe_running() &&
+ kprobe_trap_handler(regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ break;
+ default:
+ break;
+ }
+
+ if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+ local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+
+ return ret;
+}
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
+
+int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long stack;
+
+ memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
+
+ /* setup return addr to the jprobe handler routine */
+ regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
+ regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
+
+ /* r15 is the stack pointer */
+ stack = (unsigned long) regs->gprs[15];
+
+ memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
+ return 1;
+}
+NOKPROBE_SYMBOL(setjmp_pre_handler);
+
+void jprobe_return(void)
+{
+ asm volatile(".word 0x0002");
+}
+NOKPROBE_SYMBOL(jprobe_return);
+
+int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long stack;
+
+ stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15];
+
+ /* Put the regs back */
+ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ /* put the stack back */
+ memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack));
+ preempt_enable_no_resched();
+ return 1;
+}
+NOKPROBE_SYMBOL(longjmp_break_handler);
+
+static struct kprobe trampoline = {
+ .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline);
+}
+
+int arch_trampoline_kprobe(struct kprobe *p)
+{
+ return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;
+}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
new file mode 100644
index 000000000..6ea6d6933
--- /dev/null
+++ b/arch/s390/kernel/lgr.c
@@ -0,0 +1,186 @@
+/*
+ * Linux Guest Relocation (LGR) detection
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+
+#define LGR_TIMER_INTERVAL_SECS (30 * 60)
+#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */
+
+/*
+ * LGR info: Contains stfle and stsi data
+ */
+struct lgr_info {
+ /* Bit field with facility information: 4 DWORDs are stored */
+ u64 stfle_fac_list[4];
+ /* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */
+ u32 level;
+ /* Level 1: CEC info (stsi 1.1.1) */
+ char manufacturer[16];
+ char type[4];
+ char sequence[16];
+ char plant[4];
+ char model[16];
+ /* Level 2: LPAR info (stsi 2.2.2) */
+ u16 lpar_number;
+ char name[8];
+ /* Level 3: VM info (stsi 3.2.2) */
+ u8 vm_count;
+ struct {
+ char name[8];
+ char cpi[16];
+ } vm[VM_LEVEL_MAX];
+} __packed __aligned(8);
+
+/*
+ * LGR globals
+ */
+static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+static struct lgr_info lgr_info_last;
+static struct lgr_info lgr_info_cur;
+static struct debug_info *lgr_dbf;
+
+/*
+ * Copy buffer and then convert it to ASCII
+ */
+static void cpascii(char *dst, char *src, int size)
+{
+ memcpy(dst, src, size);
+ EBCASC(dst, size);
+}
+
+/*
+ * Fill LGR info with 1.1.1 stsi data
+ */
+static void lgr_stsi_1_1_1(struct lgr_info *lgr_info)
+{
+ struct sysinfo_1_1_1 *si = (void *) lgr_page;
+
+ if (stsi(si, 1, 1, 1))
+ return;
+ cpascii(lgr_info->manufacturer, si->manufacturer,
+ sizeof(si->manufacturer));
+ cpascii(lgr_info->type, si->type, sizeof(si->type));
+ cpascii(lgr_info->model, si->model, sizeof(si->model));
+ cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence));
+ cpascii(lgr_info->plant, si->plant, sizeof(si->plant));
+}
+
+/*
+ * Fill LGR info with 2.2.2 stsi data
+ */
+static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
+{
+ struct sysinfo_2_2_2 *si = (void *) lgr_page;
+
+ if (stsi(si, 2, 2, 2))
+ return;
+ cpascii(lgr_info->name, si->name, sizeof(si->name));
+ memcpy(&lgr_info->lpar_number, &si->lpar_number,
+ sizeof(lgr_info->lpar_number));
+}
+
+/*
+ * Fill LGR info with 3.2.2 stsi data
+ */
+static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
+{
+ struct sysinfo_3_2_2 *si = (void *) lgr_page;
+ int i;
+
+ if (stsi(si, 3, 2, 2))
+ return;
+ for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) {
+ cpascii(lgr_info->vm[i].name, si->vm[i].name,
+ sizeof(si->vm[i].name));
+ cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi,
+ sizeof(si->vm[i].cpi));
+ }
+ lgr_info->vm_count = si->count;
+}
+
+/*
+ * Fill LGR info with current data
+ */
+static void lgr_info_get(struct lgr_info *lgr_info)
+{
+ int level;
+
+ memset(lgr_info, 0, sizeof(*lgr_info));
+ stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list));
+ level = stsi(NULL, 0, 0, 0);
+ lgr_info->level = level;
+ if (level >= 1)
+ lgr_stsi_1_1_1(lgr_info);
+ if (level >= 2)
+ lgr_stsi_2_2_2(lgr_info);
+ if (level >= 3)
+ lgr_stsi_3_2_2(lgr_info);
+}
+
+/*
+ * Check if LGR info has changed and if yes log new LGR info to s390dbf
+ */
+void lgr_info_log(void)
+{
+ static DEFINE_SPINLOCK(lgr_info_lock);
+ unsigned long flags;
+
+ if (!spin_trylock_irqsave(&lgr_info_lock, flags))
+ return;
+ lgr_info_get(&lgr_info_cur);
+ if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) {
+ debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur));
+ lgr_info_last = lgr_info_cur;
+ }
+ spin_unlock_irqrestore(&lgr_info_lock, flags);
+}
+EXPORT_SYMBOL_GPL(lgr_info_log);
+
+static void lgr_timer_set(void);
+
+/*
+ * LGR timer callback
+ */
+static void lgr_timer_fn(unsigned long ignored)
+{
+ lgr_info_log();
+ lgr_timer_set();
+}
+
+static struct timer_list lgr_timer =
+ TIMER_DEFERRED_INITIALIZER(lgr_timer_fn, 0, 0);
+
+/*
+ * Setup next LGR timer
+ */
+static void lgr_timer_set(void)
+{
+ mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+}
+
+/*
+ * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer
+ */
+static int __init lgr_init(void)
+{
+ lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info));
+ if (!lgr_dbf)
+ return -ENOMEM;
+ debug_register_view(lgr_dbf, &debug_hex_ascii_view);
+ lgr_info_get(&lgr_info_last);
+ debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last));
+ lgr_timer_set();
+ return 0;
+}
+module_init(lgr_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
new file mode 100644
index 000000000..fb0901ec4
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec.c
@@ -0,0 +1,275 @@
+/*
+ * Copyright IBM Corp. 2005, 2011
+ *
+ * Author(s): Rolf Adelsberger,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/ftrace.h>
+#include <linux/debug_locks.h>
+#include <linux/suspend.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/smp.h>
+#include <asm/reset.h>
+#include <asm/ipl.h>
+#include <asm/diag.h>
+#include <asm/elf.h>
+#include <asm/asm-offsets.h>
+#include <asm/os_info.h>
+#include <asm/switch_to.h>
+
+typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
+
+extern const unsigned char relocate_kernel[];
+extern const unsigned long long relocate_kernel_len;
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Create ELF notes for one CPU
+ */
+static void add_elf_notes(int cpu)
+{
+ struct save_area *sa = (void *) 4608 + store_prefix();
+ void *ptr;
+
+ memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
+ ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
+ ptr = fill_cpu_elf_notes(ptr, sa, NULL);
+ memset(ptr, 0, sizeof(struct elf_note));
+}
+
+/*
+ * Initialize CPU ELF notes
+ */
+static void setup_regs(void)
+{
+ unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
+ struct _lowcore *lc;
+ int cpu, this_cpu;
+
+ /* Get lowcore pointer from store status of this CPU (absolute zero) */
+ lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area;
+ this_cpu = smp_find_processor_id(stap());
+ add_elf_notes(this_cpu);
+ for_each_online_cpu(cpu) {
+ if (cpu == this_cpu)
+ continue;
+ if (smp_store_status(cpu))
+ continue;
+ add_elf_notes(cpu);
+ }
+ if (MACHINE_HAS_VX)
+ save_vx_regs_safe((void *) lc->vector_save_area_addr);
+ /* Copy dump CPU store status info to absolute zero */
+ memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
+}
+
+/*
+ * PM notifier callback for kdump
+ */
+static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ if (crashk_res.start)
+ crash_map_reserved_pages();
+ break;
+ case PM_POST_SUSPEND:
+ case PM_POST_HIBERNATION:
+ if (crashk_res.start)
+ crash_unmap_reserved_pages();
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static int __init machine_kdump_pm_init(void)
+{
+ pm_notifier(machine_kdump_pm_cb, 0);
+ return 0;
+}
+arch_initcall(machine_kdump_pm_init);
+
+/*
+ * Start kdump: We expect here that a store status has been done on our CPU
+ */
+static void __do_machine_kdump(void *image)
+{
+ int (*start_kdump)(int) = (void *)((struct kimage *) image)->start;
+
+ __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA);
+ start_kdump(1);
+}
+#endif
+
+/*
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
+ */
+static int kdump_csum_valid(struct kimage *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+ int (*start_kdump)(int) = (void *)image->start;
+ int rc;
+
+ __arch_local_irq_stnsm(0xfb); /* disable DAT */
+ rc = start_kdump(0);
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ return rc ? 0 : -EINVAL;
+#else
+ return -EINVAL;
+#endif
+}
+
+/*
+ * Map or unmap crashkernel memory
+ */
+static void crash_map_pages(int enable)
+{
+ unsigned long size = resource_size(&crashk_res);
+
+ BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
+ size % KEXEC_CRASH_MEM_ALIGN);
+ if (enable)
+ vmem_add_mapping(crashk_res.start, size);
+ else {
+ vmem_remove_mapping(crashk_res.start, size);
+ if (size)
+ os_info_crashkernel_add(crashk_res.start, size);
+ else
+ os_info_crashkernel_add(0, 0);
+ }
+}
+
+/*
+ * Map crashkernel memory
+ */
+void crash_map_reserved_pages(void)
+{
+ crash_map_pages(1);
+}
+
+/*
+ * Unmap crashkernel memory
+ */
+void crash_unmap_reserved_pages(void)
+{
+ crash_map_pages(0);
+}
+
+/*
+ * Give back memory to hypervisor before new kdump is loaded
+ */
+static int machine_kexec_prepare_kdump(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (MACHINE_IS_VM)
+ diag10_range(PFN_DOWN(crashk_res.start),
+ PFN_DOWN(crashk_res.end - crashk_res.start + 1));
+ return 0;
+#else
+ return -EINVAL;
+#endif
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+ void *reboot_code_buffer;
+
+ /* Can't replace kernel image since it is read-only. */
+ if (ipl_flags & IPL_NSS_VALID)
+ return -EOPNOTSUPP;
+
+ if (image->type == KEXEC_TYPE_CRASH)
+ return machine_kexec_prepare_kdump();
+
+ /* We don't support anything but the default image type for now. */
+ if (image->type != KEXEC_TYPE_DEFAULT)
+ return -EINVAL;
+
+ /* Get the destination where the assembler code should be copied to.*/
+ reboot_code_buffer = (void *) page_to_phys(image->control_code_page);
+
+ /* Then copy it */
+ memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len);
+ return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+ VMCOREINFO_SYMBOL(lowcore_ptr);
+ VMCOREINFO_SYMBOL(high_memory);
+ VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+}
+
+void machine_shutdown(void)
+{
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+}
+
+/*
+ * Do normal kexec
+ */
+static void __do_machine_kexec(void *data)
+{
+ relocate_kernel_t data_mover;
+ struct kimage *image = data;
+
+ data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
+
+ /* Call the moving routine */
+ (*data_mover)(&image->head, image->start);
+}
+
+/*
+ * Reset system and call either kdump or normal kexec
+ */
+static void __machine_kexec(void *data)
+{
+ __arch_local_irq_stosm(0x04); /* enable DAT */
+ pfault_fini();
+ tracing_off();
+ debug_locks_off();
+#ifdef CONFIG_CRASH_DUMP
+ if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) {
+
+ lgr_info_log();
+ s390_reset_system(setup_regs, __do_machine_kdump, data);
+ } else
+#endif
+ s390_reset_system(NULL, __do_machine_kexec, data);
+ disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+/*
+ * Do either kdump or normal kexec. In case of kdump we first ask
+ * purgatory, if kdump checksums are valid.
+ */
+void machine_kexec(struct kimage *image)
+{
+ if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image))
+ return;
+ tracer_disable();
+ smp_send_stop();
+ smp_call_ipl_cpu(__machine_kexec, image);
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
new file mode 100644
index 000000000..e499370fb
--- /dev/null
+++ b/arch/s390/kernel/mcount.S
@@ -0,0 +1,82 @@
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ptrace.h>
+
+ .section .kprobes.text, "ax"
+
+ENTRY(ftrace_stub)
+ br %r14
+
+#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
+
+ENTRY(_mcount)
+ br %r14
+
+ENTRY(ftrace_caller)
+ .globl ftrace_regs_caller
+ .set ftrace_regs_caller,ftrace_caller
+ lgr %r1,%r15
+#ifndef CC_USING_HOTPATCH
+ aghi %r0,MCOUNT_RETURN_FIXUP
+#endif
+ aghi %r15,-STACK_FRAME_SIZE
+ stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+ stg %r0,(STACK_PTREGS_PSW+8)(%r15)
+ stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ aghik %r2,%r0,-MCOUNT_INSN_SIZE
+ lgrl %r4,function_trace_op
+ lgrl %r1,ftrace_trace_function
+#else
+ lgr %r2,%r0
+ aghi %r2,-MCOUNT_INSN_SIZE
+ larl %r4,function_trace_op
+ lg %r4,0(%r4)
+ larl %r1,ftrace_trace_function
+ lg %r1,0(%r1)
+#endif
+ lgr %r3,%r14
+ la %r5,STACK_PTREGS(%r15)
+ basr %r14,%r1
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+# The j instruction gets runtime patched to a nop instruction.
+# See ftrace_enable_ftrace_graph_caller.
+ENTRY(ftrace_graph_caller)
+ j ftrace_graph_caller_end
+ lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+ lg %r3,(STACK_PTREGS_PSW+8)(%r15)
+ brasl %r14,prepare_ftrace_return
+ stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+ftrace_graph_caller_end:
+ .globl ftrace_graph_caller_end
+#endif
+ lg %r1,(STACK_PTREGS_PSW+8)(%r15)
+ lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+ br %r1
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+ENTRY(return_to_handler)
+ stmg %r2,%r5,32(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+ brasl %r14,ftrace_return_to_handler
+ aghi %r15,STACK_FRAME_OVERHEAD
+ lgr %r14,%r2
+ lmg %r2,%r5,32(%r15)
+ br %r14
+
+#endif
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
new file mode 100644
index 000000000..0c1a67931
--- /dev/null
+++ b/arch/s390/kernel/module.c
@@ -0,0 +1,431 @@
+/*
+ * Kernel module help for s390.
+ *
+ * S390 version
+ * Copyright IBM Corp. 2002, 2003
+ * Author(s): Arnd Bergmann (arndb@de.ibm.com)
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * based on i386 version
+ * Copyright (C) 2001 Rusty Russell.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/moduleloader.h>
+#include <linux/bug.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+#define PLT_ENTRY_SIZE 20
+
+void *module_alloc(unsigned long size)
+{
+ if (PAGE_ALIGN(size) > MODULES_LEN)
+ return NULL;
+ return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+
+void module_arch_freeing_init(struct module *mod)
+{
+ vfree(mod->arch.syminfo);
+ mod->arch.syminfo = NULL;
+}
+
+static void check_rela(Elf_Rela *rela, struct module *me)
+{
+ struct mod_arch_syminfo *info;
+
+ info = me->arch.syminfo + ELF_R_SYM (rela->r_info);
+ switch (ELF_R_TYPE (rela->r_info)) {
+ case R_390_GOT12: /* 12 bit GOT offset. */
+ case R_390_GOT16: /* 16 bit GOT offset. */
+ case R_390_GOT20: /* 20 bit GOT offset. */
+ case R_390_GOT32: /* 32 bit GOT offset. */
+ case R_390_GOT64: /* 64 bit GOT offset. */
+ case R_390_GOTENT: /* 32 bit PC rel. to GOT entry shifted by 1. */
+ case R_390_GOTPLT12: /* 12 bit offset to jump slot. */
+ case R_390_GOTPLT16: /* 16 bit offset to jump slot. */
+ case R_390_GOTPLT20: /* 20 bit offset to jump slot. */
+ case R_390_GOTPLT32: /* 32 bit offset to jump slot. */
+ case R_390_GOTPLT64: /* 64 bit offset to jump slot. */
+ case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */
+ if (info->got_offset == -1UL) {
+ info->got_offset = me->arch.got_size;
+ me->arch.got_size += sizeof(void*);
+ }
+ break;
+ case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
+ case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */
+ case R_390_PLT32: /* 32 bit PC relative PLT address. */
+ case R_390_PLT64: /* 64 bit PC relative PLT address. */
+ case R_390_PLTOFF16: /* 16 bit offset from GOT to PLT. */
+ case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
+ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
+ if (info->plt_offset == -1UL) {
+ info->plt_offset = me->arch.plt_size;
+ me->arch.plt_size += PLT_ENTRY_SIZE;
+ }
+ break;
+ case R_390_COPY:
+ case R_390_GLOB_DAT:
+ case R_390_JMP_SLOT:
+ case R_390_RELATIVE:
+ /* Only needed if we want to support loading of
+ modules linked with -shared. */
+ break;
+ }
+}
+
+/*
+ * Account for GOT and PLT relocations. We can't add sections for
+ * got and plt but we can increase the core module size.
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *me)
+{
+ Elf_Shdr *symtab;
+ Elf_Sym *symbols;
+ Elf_Rela *rela;
+ char *strings;
+ int nrela, i, j;
+
+ /* Find symbol table and string table. */
+ symtab = NULL;
+ for (i = 0; i < hdr->e_shnum; i++)
+ switch (sechdrs[i].sh_type) {
+ case SHT_SYMTAB:
+ symtab = sechdrs + i;
+ break;
+ }
+ if (!symtab) {
+ printk(KERN_ERR "module %s: no symbol table\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* Allocate one syminfo structure per symbol. */
+ me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
+ me->arch.syminfo = vmalloc(me->arch.nsyms *
+ sizeof(struct mod_arch_syminfo));
+ if (!me->arch.syminfo)
+ return -ENOMEM;
+ symbols = (void *) hdr + symtab->sh_offset;
+ strings = (void *) hdr + sechdrs[symtab->sh_link].sh_offset;
+ for (i = 0; i < me->arch.nsyms; i++) {
+ if (symbols[i].st_shndx == SHN_UNDEF &&
+ strcmp(strings + symbols[i].st_name,
+ "_GLOBAL_OFFSET_TABLE_") == 0)
+ /* "Define" it as absolute. */
+ symbols[i].st_shndx = SHN_ABS;
+ me->arch.syminfo[i].got_offset = -1UL;
+ me->arch.syminfo[i].plt_offset = -1UL;
+ me->arch.syminfo[i].got_initialized = 0;
+ me->arch.syminfo[i].plt_initialized = 0;
+ }
+
+ /* Search for got/plt relocations. */
+ me->arch.got_size = me->arch.plt_size = 0;
+ for (i = 0; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type != SHT_RELA)
+ continue;
+ nrela = sechdrs[i].sh_size / sizeof(Elf_Rela);
+ rela = (void *) hdr + sechdrs[i].sh_offset;
+ for (j = 0; j < nrela; j++)
+ check_rela(rela + j, me);
+ }
+
+ /* Increase core size by size of got & plt and set start
+ offsets for got and plt. */
+ me->core_size = ALIGN(me->core_size, 4);
+ me->arch.got_offset = me->core_size;
+ me->core_size += me->arch.got_size;
+ me->arch.plt_offset = me->core_size;
+ me->core_size += me->arch.plt_size;
+ return 0;
+}
+
+static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
+ int sign, int bits, int shift)
+{
+ unsigned long umax;
+ long min, max;
+
+ if (val & ((1UL << shift) - 1))
+ return -ENOEXEC;
+ if (sign) {
+ val = (Elf_Addr)(((long) val) >> shift);
+ min = -(1L << (bits - 1));
+ max = (1L << (bits - 1)) - 1;
+ if ((long) val < min || (long) val > max)
+ return -ENOEXEC;
+ } else {
+ val >>= shift;
+ umax = ((1UL << (bits - 1)) << 1) - 1;
+ if ((unsigned long) val > umax)
+ return -ENOEXEC;
+ }
+
+ if (bits == 8)
+ *(unsigned char *) loc = val;
+ else if (bits == 12)
+ *(unsigned short *) loc = (val & 0xfff) |
+ (*(unsigned short *) loc & 0xf000);
+ else if (bits == 16)
+ *(unsigned short *) loc = val;
+ else if (bits == 20)
+ *(unsigned int *) loc = (val & 0xfff) << 16 |
+ (val & 0xff000) >> 4 |
+ (*(unsigned int *) loc & 0xf00000ff);
+ else if (bits == 32)
+ *(unsigned int *) loc = val;
+ else if (bits == 64)
+ *(unsigned long *) loc = val;
+ return 0;
+}
+
+static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+ const char *strtab, struct module *me)
+{
+ struct mod_arch_syminfo *info;
+ Elf_Addr loc, val;
+ int r_type, r_sym;
+ int rc = -ENOEXEC;
+
+ /* This is where to make the change */
+ loc = base + rela->r_offset;
+ /* This is the symbol it is referring to. Note that all
+ undefined symbols have been resolved. */
+ r_sym = ELF_R_SYM(rela->r_info);
+ r_type = ELF_R_TYPE(rela->r_info);
+ info = me->arch.syminfo + r_sym;
+ val = symtab[r_sym].st_value;
+
+ switch (r_type) {
+ case R_390_NONE: /* No relocation. */
+ rc = 0;
+ break;
+ case R_390_8: /* Direct 8 bit. */
+ case R_390_12: /* Direct 12 bit. */
+ case R_390_16: /* Direct 16 bit. */
+ case R_390_20: /* Direct 20 bit. */
+ case R_390_32: /* Direct 32 bit. */
+ case R_390_64: /* Direct 64 bit. */
+ val += rela->r_addend;
+ if (r_type == R_390_8)
+ rc = apply_rela_bits(loc, val, 0, 8, 0);
+ else if (r_type == R_390_12)
+ rc = apply_rela_bits(loc, val, 0, 12, 0);
+ else if (r_type == R_390_16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_20)
+ rc = apply_rela_bits(loc, val, 1, 20, 0);
+ else if (r_type == R_390_32)
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
+ else if (r_type == R_390_64)
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ break;
+ case R_390_PC16: /* PC relative 16 bit. */
+ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
+ case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */
+ case R_390_PC32: /* PC relative 32 bit. */
+ case R_390_PC64: /* PC relative 64 bit. */
+ val += rela->r_addend - loc;
+ if (r_type == R_390_PC16)
+ rc = apply_rela_bits(loc, val, 1, 16, 0);
+ else if (r_type == R_390_PC16DBL)
+ rc = apply_rela_bits(loc, val, 1, 16, 1);
+ else if (r_type == R_390_PC32DBL)
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ else if (r_type == R_390_PC32)
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
+ else if (r_type == R_390_PC64)
+ rc = apply_rela_bits(loc, val, 1, 64, 0);
+ break;
+ case R_390_GOT12: /* 12 bit GOT offset. */
+ case R_390_GOT16: /* 16 bit GOT offset. */
+ case R_390_GOT20: /* 20 bit GOT offset. */
+ case R_390_GOT32: /* 32 bit GOT offset. */
+ case R_390_GOT64: /* 64 bit GOT offset. */
+ case R_390_GOTENT: /* 32 bit PC rel. to GOT entry shifted by 1. */
+ case R_390_GOTPLT12: /* 12 bit offset to jump slot. */
+ case R_390_GOTPLT20: /* 20 bit offset to jump slot. */
+ case R_390_GOTPLT16: /* 16 bit offset to jump slot. */
+ case R_390_GOTPLT32: /* 32 bit offset to jump slot. */
+ case R_390_GOTPLT64: /* 64 bit offset to jump slot. */
+ case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */
+ if (info->got_initialized == 0) {
+ Elf_Addr *gotent;
+
+ gotent = me->module_core + me->arch.got_offset +
+ info->got_offset;
+ *gotent = val;
+ info->got_initialized = 1;
+ }
+ val = info->got_offset + rela->r_addend;
+ if (r_type == R_390_GOT12 ||
+ r_type == R_390_GOTPLT12)
+ rc = apply_rela_bits(loc, val, 0, 12, 0);
+ else if (r_type == R_390_GOT16 ||
+ r_type == R_390_GOTPLT16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_GOT20 ||
+ r_type == R_390_GOTPLT20)
+ rc = apply_rela_bits(loc, val, 1, 20, 0);
+ else if (r_type == R_390_GOT32 ||
+ r_type == R_390_GOTPLT32)
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
+ else if (r_type == R_390_GOT64 ||
+ r_type == R_390_GOTPLT64)
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ else if (r_type == R_390_GOTENT ||
+ r_type == R_390_GOTPLTENT) {
+ val += (Elf_Addr) me->module_core - loc;
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ }
+ break;
+ case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
+ case R_390_PLT32DBL: /* 32 bit PC rel. PLT shifted by 1. */
+ case R_390_PLT32: /* 32 bit PC relative PLT address. */
+ case R_390_PLT64: /* 64 bit PC relative PLT address. */
+ case R_390_PLTOFF16: /* 16 bit offset from GOT to PLT. */
+ case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
+ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
+ if (info->plt_initialized == 0) {
+ unsigned int *ip;
+ ip = me->module_core + me->arch.plt_offset +
+ info->plt_offset;
+ ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
+ ip[1] = 0x100a0004;
+ ip[2] = 0x07f10000;
+ ip[3] = (unsigned int) (val >> 32);
+ ip[4] = (unsigned int) val;
+ info->plt_initialized = 1;
+ }
+ if (r_type == R_390_PLTOFF16 ||
+ r_type == R_390_PLTOFF32 ||
+ r_type == R_390_PLTOFF64)
+ val = me->arch.plt_offset - me->arch.got_offset +
+ info->plt_offset + rela->r_addend;
+ else {
+ if (!((r_type == R_390_PLT16DBL &&
+ val - loc + 0xffffUL < 0x1ffffeUL) ||
+ (r_type == R_390_PLT32DBL &&
+ val - loc + 0xffffffffULL < 0x1fffffffeULL)))
+ val = (Elf_Addr) me->module_core +
+ me->arch.plt_offset +
+ info->plt_offset;
+ val += rela->r_addend - loc;
+ }
+ if (r_type == R_390_PLT16DBL)
+ rc = apply_rela_bits(loc, val, 1, 16, 1);
+ else if (r_type == R_390_PLTOFF16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_PLT32DBL)
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ else if (r_type == R_390_PLT32 ||
+ r_type == R_390_PLTOFF32)
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
+ else if (r_type == R_390_PLT64 ||
+ r_type == R_390_PLTOFF64)
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ break;
+ case R_390_GOTOFF16: /* 16 bit offset to GOT. */
+ case R_390_GOTOFF32: /* 32 bit offset to GOT. */
+ case R_390_GOTOFF64: /* 64 bit offset to GOT. */
+ val = val + rela->r_addend -
+ ((Elf_Addr) me->module_core + me->arch.got_offset);
+ if (r_type == R_390_GOTOFF16)
+ rc = apply_rela_bits(loc, val, 0, 16, 0);
+ else if (r_type == R_390_GOTOFF32)
+ rc = apply_rela_bits(loc, val, 0, 32, 0);
+ else if (r_type == R_390_GOTOFF64)
+ rc = apply_rela_bits(loc, val, 0, 64, 0);
+ break;
+ case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
+ case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
+ val = (Elf_Addr) me->module_core + me->arch.got_offset +
+ rela->r_addend - loc;
+ if (r_type == R_390_GOTPC)
+ rc = apply_rela_bits(loc, val, 1, 32, 0);
+ else if (r_type == R_390_GOTPCDBL)
+ rc = apply_rela_bits(loc, val, 1, 32, 1);
+ break;
+ case R_390_COPY:
+ case R_390_GLOB_DAT: /* Create GOT entry. */
+ case R_390_JMP_SLOT: /* Create PLT entry. */
+ case R_390_RELATIVE: /* Adjust by program base. */
+ /* Only needed if we want to support loading of
+ modules linked with -shared. */
+ return -ENOEXEC;
+ default:
+ printk(KERN_ERR "module %s: unknown relocation: %u\n",
+ me->name, r_type);
+ return -ENOEXEC;
+ }
+ if (rc) {
+ printk(KERN_ERR "module %s: relocation error for symbol %s "
+ "(r_type %i, value 0x%lx)\n",
+ me->name, strtab + symtab[r_sym].st_name,
+ r_type, (unsigned long) val);
+ return rc;
+ }
+ return 0;
+}
+
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relsec,
+ struct module *me)
+{
+ Elf_Addr base;
+ Elf_Sym *symtab;
+ Elf_Rela *rela;
+ unsigned long i, n;
+ int rc;
+
+ DEBUGP("Applying relocate section %u to %u\n",
+ relsec, sechdrs[relsec].sh_info);
+ base = sechdrs[sechdrs[relsec].sh_info].sh_addr;
+ symtab = (Elf_Sym *) sechdrs[symindex].sh_addr;
+ rela = (Elf_Rela *) sechdrs[relsec].sh_addr;
+ n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
+
+ for (i = 0; i < n; i++, rela++) {
+ rc = apply_rela(rela, base, symtab, strtab, me);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ jump_label_apply_nops(me);
+ vfree(me->arch.syminfo);
+ me->arch.syminfo = NULL;
+ return 0;
+}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
new file mode 100644
index 000000000..505c17c0a
--- /dev/null
+++ b/arch/s390/kernel/nmi.c
@@ -0,0 +1,352 @@
+/*
+ * Machine check handler
+ *
+ * Copyright IBM Corp. 2000, 2009
+ * Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Cornelia Huck <cornelia.huck@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <asm/lowcore.h>
+#include <asm/smp.h>
+#include <asm/etr.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/crw.h>
+#include <asm/switch_to.h>
+
+struct mcck_struct {
+ int kill_task;
+ int channel_report;
+ int warning;
+ unsigned long long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
+static void s390_handle_damage(char *msg)
+{
+ smp_send_stop();
+ disabled_wait((unsigned long) __builtin_return_address(0));
+ while (1);
+}
+
+/*
+ * Main machine check handler function. Will be called with interrupts enabled
+ * or disabled and machine checks enabled or disabled.
+ */
+void s390_handle_mcck(void)
+{
+ unsigned long flags;
+ struct mcck_struct mcck;
+
+ /*
+ * Disable machine checks and get the current state of accumulated
+ * machine checks. Afterwards delete the old state and enable machine
+ * checks again.
+ */
+ local_irq_save(flags);
+ local_mcck_disable();
+ mcck = *this_cpu_ptr(&cpu_mcck);
+ memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
+ clear_cpu_flag(CIF_MCCK_PENDING);
+ local_mcck_enable();
+ local_irq_restore(flags);
+
+ if (mcck.channel_report)
+ crw_handle_channel_report();
+ /*
+ * A warning may remain for a prolonged period on the bare iron.
+ * (actually until the machine is powered off, or the problem is gone)
+ * So we just stop listening for the WARNING MCH and avoid continuously
+ * being interrupted. One caveat is however, that we must do this per
+ * processor and cannot use the smp version of ctl_clear_bit().
+ * On VM we only get one interrupt per virtally presented machinecheck.
+ * Though one suffices, we may get one interrupt per (virtual) cpu.
+ */
+ if (mcck.warning) { /* WARNING pending ? */
+ static int mchchk_wng_posted = 0;
+
+ /* Use single cpu clear, as we cannot handle smp here. */
+ __ctl_clear_bit(14, 24); /* Disable WARNING MCH */
+ if (xchg(&mchchk_wng_posted, 1) == 0)
+ kill_cad_pid(SIGPWR, 1);
+ }
+ if (mcck.kill_task) {
+ local_irq_enable();
+ printk(KERN_EMERG "mcck: Terminating task because of machine "
+ "malfunction (code 0x%016llx).\n", mcck.mcck_code);
+ printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+ current->comm, current->pid);
+ do_exit(SIGSEGV);
+ }
+}
+EXPORT_SYMBOL_GPL(s390_handle_mcck);
+
+/*
+ * returns 0 if all registers could be validated
+ * returns 1 otherwise
+ */
+static int notrace s390_revalidate_registers(struct mci *mci)
+{
+ int kill_task;
+ u64 zero;
+ void *fpt_save_area, *fpt_creg_save_area;
+
+ kill_task = 0;
+ zero = 0;
+
+ if (!mci->gr) {
+ /*
+ * General purpose registers couldn't be restored and have
+ * unknown contents. Process needs to be terminated.
+ */
+ kill_task = 1;
+ }
+ if (!mci->fp) {
+ /*
+ * Floating point registers can't be restored and
+ * therefore the process needs to be terminated.
+ */
+ kill_task = 1;
+ }
+ fpt_save_area = &S390_lowcore.floating_pt_save_area;
+ fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
+ if (!mci->fc) {
+ /*
+ * Floating point control register can't be restored.
+ * Task will be terminated.
+ */
+ asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
+ kill_task = 1;
+ } else
+ asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
+
+ asm volatile(
+ " ld 0,0(%0)\n"
+ " ld 1,8(%0)\n"
+ " ld 2,16(%0)\n"
+ " ld 3,24(%0)\n"
+ " ld 4,32(%0)\n"
+ " ld 5,40(%0)\n"
+ " ld 6,48(%0)\n"
+ " ld 7,56(%0)\n"
+ " ld 8,64(%0)\n"
+ " ld 9,72(%0)\n"
+ " ld 10,80(%0)\n"
+ " ld 11,88(%0)\n"
+ " ld 12,96(%0)\n"
+ " ld 13,104(%0)\n"
+ " ld 14,112(%0)\n"
+ " ld 15,120(%0)\n"
+ : : "a" (fpt_save_area));
+ /* Revalidate vector registers */
+ if (MACHINE_HAS_VX && current->thread.vxrs) {
+ if (!mci->vr) {
+ /*
+ * Vector registers can't be restored and therefore
+ * the process needs to be terminated.
+ */
+ kill_task = 1;
+ }
+ restore_vx_regs((__vector128 *)
+ S390_lowcore.vector_save_area_addr);
+ }
+ /* Revalidate access registers */
+ asm volatile(
+ " lam 0,15,0(%0)"
+ : : "a" (&S390_lowcore.access_regs_save_area));
+ if (!mci->ar) {
+ /*
+ * Access registers have unknown contents.
+ * Terminating task.
+ */
+ kill_task = 1;
+ }
+ /* Revalidate control registers */
+ if (!mci->cr) {
+ /*
+ * Control registers have unknown contents.
+ * Can't recover and therefore stopping machine.
+ */
+ s390_handle_damage("invalid control registers.");
+ } else {
+ asm volatile(
+ " lctlg 0,15,0(%0)"
+ : : "a" (&S390_lowcore.cregs_save_area));
+ }
+ /*
+ * We don't even try to revalidate the TOD register, since we simply
+ * can't write something sensible into that register.
+ */
+ /*
+ * See if we can revalidate the TOD programmable register with its
+ * old contents (should be zero) otherwise set it to zero.
+ */
+ if (!mci->pr)
+ asm volatile(
+ " sr 0,0\n"
+ " sckpf"
+ : : : "0", "cc");
+ else
+ asm volatile(
+ " l 0,0(%0)\n"
+ " sckpf"
+ : : "a" (&S390_lowcore.tod_progreg_save_area)
+ : "0", "cc");
+ /* Revalidate clock comparator register */
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ /* Check if old PSW is valid */
+ if (!mci->wp)
+ /*
+ * Can't tell if we come from user or kernel mode
+ * -> stopping machine.
+ */
+ s390_handle_damage("old psw invalid.");
+
+ if (!mci->ms || !mci->pm || !mci->ia)
+ kill_task = 1;
+
+ return kill_task;
+}
+
+#define MAX_IPD_COUNT 29
+#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
+
+#define ED_STP_ISLAND 6 /* External damage STP island check */
+#define ED_STP_SYNC 7 /* External damage STP sync check */
+#define ED_ETR_SYNC 12 /* External damage ETR sync check */
+#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */
+
+/*
+ * machine check handler.
+ */
+void notrace s390_do_machine_check(struct pt_regs *regs)
+{
+ static int ipd_count;
+ static DEFINE_SPINLOCK(ipd_lock);
+ static unsigned long long last_ipd;
+ struct mcck_struct *mcck;
+ unsigned long long tmp;
+ struct mci *mci;
+ int umode;
+
+ nmi_enter();
+ inc_irq_stat(NMI_NMI);
+ mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
+ mcck = this_cpu_ptr(&cpu_mcck);
+ umode = user_mode(regs);
+
+ if (mci->sd) {
+ /* System damage -> stopping machine */
+ s390_handle_damage("received system damage machine check.");
+ }
+ if (mci->pd) {
+ if (mci->b) {
+ /* Processing backup -> verify if we can survive this */
+ u64 z_mcic, o_mcic, t_mcic;
+ z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+ o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+ 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+ 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+ 1ULL<<16);
+ t_mcic = *(u64 *)mci;
+
+ if (((t_mcic & z_mcic) != 0) ||
+ ((t_mcic & o_mcic) != o_mcic)) {
+ s390_handle_damage("processing backup machine "
+ "check with damage.");
+ }
+
+ /*
+ * Nullifying exigent condition, therefore we might
+ * retry this instruction.
+ */
+ spin_lock(&ipd_lock);
+ tmp = get_tod_clock();
+ if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
+ ipd_count++;
+ else
+ ipd_count = 1;
+ last_ipd = tmp;
+ if (ipd_count == MAX_IPD_COUNT)
+ s390_handle_damage("too many ipd retries.");
+ spin_unlock(&ipd_lock);
+ } else {
+ /* Processing damage -> stopping machine */
+ s390_handle_damage("received instruction processing "
+ "damage machine check.");
+ }
+ }
+ if (s390_revalidate_registers(mci)) {
+ if (umode) {
+ /*
+ * Couldn't restore all register contents while in
+ * user mode -> mark task for termination.
+ */
+ mcck->kill_task = 1;
+ mcck->mcck_code = *(unsigned long long *) mci;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ } else {
+ /*
+ * Couldn't restore all register contents while in
+ * kernel mode -> stopping machine.
+ */
+ s390_handle_damage("unable to revalidate registers.");
+ }
+ }
+ if (mci->cd) {
+ /* Timing facility damage */
+ s390_handle_damage("TOD clock damaged");
+ }
+ if (mci->ed && mci->ec) {
+ /* External damage */
+ if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
+ etr_sync_check();
+ if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
+ etr_switch_to_local();
+ if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+ stp_sync_check();
+ if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+ stp_island_check();
+ }
+ if (mci->se)
+ /* Storage error uncorrected */
+ s390_handle_damage("received storage error uncorrected "
+ "machine check.");
+ if (mci->ke)
+ /* Storage key-error uncorrected */
+ s390_handle_damage("received storage key-error uncorrected "
+ "machine check.");
+ if (mci->ds && mci->fa)
+ /* Storage degradation */
+ s390_handle_damage("received storage degradation machine "
+ "check.");
+ if (mci->cp) {
+ /* Channel report word pending */
+ mcck->channel_report = 1;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ }
+ if (mci->w) {
+ /* Warning pending */
+ mcck->warning = 1;
+ set_cpu_flag(CIF_MCCK_PENDING);
+ }
+ nmi_exit();
+}
+
+static int __init machine_check_init(void)
+{
+ ctl_set_bit(14, 25); /* enable external damage MCH */
+ ctl_set_bit(14, 27); /* enable system recovery MCH */
+ ctl_set_bit(14, 24); /* enable warning MCH */
+ return 0;
+}
+arch_initcall(machine_check_init);
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
new file mode 100644
index 000000000..d112fc66f
--- /dev/null
+++ b/arch/s390/kernel/os_info.c
@@ -0,0 +1,168 @@
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "os_info"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/crash_dump.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/checksum.h>
+#include <asm/lowcore.h>
+#include <asm/os_info.h>
+
+/*
+ * OS info structure has to be page aligned
+ */
+static struct os_info os_info __page_aligned_data;
+
+/*
+ * Compute checksum over OS info structure
+ */
+u32 os_info_csum(struct os_info *os_info)
+{
+ int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
+ return csum_partial(&os_info->version_major, size, 0);
+}
+
+/*
+ * Add crashkernel info to OS info and update checksum
+ */
+void os_info_crashkernel_add(unsigned long base, unsigned long size)
+{
+ os_info.crashkernel_addr = (u64)(unsigned long)base;
+ os_info.crashkernel_size = (u64)(unsigned long)size;
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info entry and update checksum
+ */
+void os_info_entry_add(int nr, void *ptr, u64 size)
+{
+ os_info.entry[nr].addr = (u64)(unsigned long)ptr;
+ os_info.entry[nr].size = size;
+ os_info.entry[nr].csum = csum_partial(ptr, size, 0);
+ os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Initialize OS info struture and set lowcore pointer
+ */
+void __init os_info_init(void)
+{
+ void *ptr = &os_info;
+
+ os_info.version_major = OS_INFO_VERSION_MAJOR;
+ os_info.version_minor = OS_INFO_VERSION_MINOR;
+ os_info.magic = OS_INFO_MAGIC;
+ os_info.csum = os_info_csum(&os_info);
+ mem_assign_absolute(S390_lowcore.os_info, (unsigned long) ptr);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static struct os_info *os_info_old;
+
+/*
+ * Allocate and copy OS info entry from oldmem
+ */
+static void os_info_old_alloc(int nr, int align)
+{
+ unsigned long addr, size = 0;
+ char *buf, *buf_align, *msg;
+ u32 csum;
+
+ addr = os_info_old->entry[nr].addr;
+ if (!addr) {
+ msg = "not available";
+ goto fail;
+ }
+ size = os_info_old->entry[nr].size;
+ buf = kmalloc(size + align - 1, GFP_KERNEL);
+ if (!buf) {
+ msg = "alloc failed";
+ goto fail;
+ }
+ buf_align = PTR_ALIGN(buf, align);
+ if (copy_from_oldmem(buf_align, (void *) addr, size)) {
+ msg = "copy failed";
+ goto fail_free;
+ }
+ csum = csum_partial(buf_align, size, 0);
+ if (csum != os_info_old->entry[nr].csum) {
+ msg = "checksum failed";
+ goto fail_free;
+ }
+ os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align;
+ msg = "copied";
+ goto out;
+fail_free:
+ kfree(buf);
+fail:
+ os_info_old->entry[nr].addr = 0;
+out:
+ pr_info("entry %i: %s (addr=0x%lx size=%lu)\n",
+ nr, msg, addr, size);
+}
+
+/*
+ * Initialize os info and os info entries from oldmem
+ */
+static void os_info_old_init(void)
+{
+ static int os_info_init;
+ unsigned long addr;
+
+ if (os_info_init)
+ return;
+ if (!OLDMEM_BASE)
+ goto fail;
+ if (copy_from_oldmem(&addr, &S390_lowcore.os_info, sizeof(addr)))
+ goto fail;
+ if (addr == 0 || addr % PAGE_SIZE)
+ goto fail;
+ os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
+ if (!os_info_old)
+ goto fail;
+ if (copy_from_oldmem(os_info_old, (void *) addr, sizeof(*os_info_old)))
+ goto fail_free;
+ if (os_info_old->magic != OS_INFO_MAGIC)
+ goto fail_free;
+ if (os_info_old->csum != os_info_csum(os_info_old))
+ goto fail_free;
+ if (os_info_old->version_major > OS_INFO_VERSION_MAJOR)
+ goto fail_free;
+ os_info_old_alloc(OS_INFO_VMCOREINFO, 1);
+ os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1);
+ pr_info("crashkernel: addr=0x%lx size=%lu\n",
+ (unsigned long) os_info_old->crashkernel_addr,
+ (unsigned long) os_info_old->crashkernel_size);
+ os_info_init = 1;
+ return;
+fail_free:
+ kfree(os_info_old);
+fail:
+ os_info_init = 1;
+ os_info_old = NULL;
+}
+
+/*
+ * Return pointer to os infor entry and its size
+ */
+void *os_info_old_entry(int nr, unsigned long *size)
+{
+ os_info_old_init();
+
+ if (!os_info_old)
+ return NULL;
+ if (!os_info_old->entry[nr].addr)
+ return NULL;
+ *size = (unsigned long) os_info_old->entry[nr].size;
+ return (void *)(unsigned long)os_info_old->entry[nr].addr;
+}
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
new file mode 100644
index 000000000..56fdad479
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -0,0 +1,696 @@
+/*
+ * Performance event support for s390x - CPU-measurement Counter Facility
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "cpum_cf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <asm/ctl_reg.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+
+/* CPU-measurement counter facility supports these CPU counter sets:
+ * For CPU counter sets:
+ * Basic counter set: 0-31
+ * Problem-state counter set: 32-63
+ * Crypto-activity counter set: 64-127
+ * Extented counter set: 128-159
+ */
+enum cpumf_ctr_set {
+ /* CPU counter sets */
+ CPUMF_CTR_SET_BASIC = 0,
+ CPUMF_CTR_SET_USER = 1,
+ CPUMF_CTR_SET_CRYPTO = 2,
+ CPUMF_CTR_SET_EXT = 3,
+
+ /* Maximum number of counter sets */
+ CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT 16
+#define CPUMF_LCCTL_ACTCTL_SHIFT 0
+static const u64 cpumf_state_ctl[CPUMF_CTR_SET_MAX] = {
+ [CPUMF_CTR_SET_BASIC] = 0x02,
+ [CPUMF_CTR_SET_USER] = 0x04,
+ [CPUMF_CTR_SET_CRYPTO] = 0x08,
+ [CPUMF_CTR_SET_EXT] = 0x01,
+};
+
+static void ctr_set_enable(u64 *state, int ctr_set)
+{
+ *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+static void ctr_set_disable(u64 *state, int ctr_set)
+{
+ *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+static void ctr_set_start(u64 *state, int ctr_set)
+{
+ *state |= cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+static void ctr_set_stop(u64 *state, int ctr_set)
+{
+ *state &= ~(cpumf_state_ctl[ctr_set] << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+/* Local CPUMF event structure */
+struct cpu_hw_events {
+ struct cpumf_ctr_info info;
+ atomic_t ctr_set[CPUMF_CTR_SET_MAX];
+ u64 state, tx_state;
+ unsigned int flags;
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+ .ctr_set = {
+ [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0),
+ [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0),
+ },
+ .state = 0,
+ .flags = 0,
+};
+
+static int get_counter_set(u64 event)
+{
+ int set = -1;
+
+ if (event < 32)
+ set = CPUMF_CTR_SET_BASIC;
+ else if (event < 64)
+ set = CPUMF_CTR_SET_USER;
+ else if (event < 128)
+ set = CPUMF_CTR_SET_CRYPTO;
+ else if (event < 256)
+ set = CPUMF_CTR_SET_EXT;
+
+ return set;
+}
+
+static int validate_event(const struct hw_perf_event *hwc)
+{
+ switch (hwc->config_base) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ /* check for reserved counters */
+ if ((hwc->config >= 6 && hwc->config <= 31) ||
+ (hwc->config >= 38 && hwc->config <= 63) ||
+ (hwc->config >= 80 && hwc->config <= 127))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int validate_ctr_version(const struct hw_perf_event *hwc)
+{
+ struct cpu_hw_events *cpuhw;
+ int err = 0;
+
+ cpuhw = &get_cpu_var(cpu_hw_events);
+
+ /* check required version for counter sets */
+ switch (hwc->config_base) {
+ case CPUMF_CTR_SET_BASIC:
+ case CPUMF_CTR_SET_USER:
+ if (cpuhw->info.cfvn < 1)
+ err = -EOPNOTSUPP;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ case CPUMF_CTR_SET_EXT:
+ if (cpuhw->info.csvn < 1)
+ err = -EOPNOTSUPP;
+ if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
+ (cpuhw->info.csvn == 2 && hwc->config > 175) ||
+ (cpuhw->info.csvn > 2 && hwc->config > 255))
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ put_cpu_var(cpu_hw_events);
+ return err;
+}
+
+static int validate_ctr_auth(const struct hw_perf_event *hwc)
+{
+ struct cpu_hw_events *cpuhw;
+ u64 ctrs_state;
+ int err = 0;
+
+ cpuhw = &get_cpu_var(cpu_hw_events);
+
+ /* check authorization for cpu counter sets */
+ ctrs_state = cpumf_state_ctl[hwc->config_base];
+ if (!(ctrs_state & cpuhw->info.auth_ctl))
+ err = -EPERM;
+
+ put_cpu_var(cpu_hw_events);
+ return err;
+}
+
+/*
+ * Change the CPUMF state to active.
+ * Enable and activate the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ err = lcctl(cpuhw->state);
+ if (err) {
+ pr_err("Enabling the performance measuring unit "
+ "failed with rc=%x\n", err);
+ return;
+ }
+
+ cpuhw->flags |= PMU_F_ENABLED;
+}
+
+/*
+ * Change the CPUMF state to inactive.
+ * Disable and enable (inactive) the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ int err;
+ u64 inactive;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ err = lcctl(inactive);
+ if (err) {
+ pr_err("Disabling the performance measuring unit "
+ "failed with rc=%x\n", err);
+ return;
+ }
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events = ATOMIC_INIT(0);
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMC);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpuhw->info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+}
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+static void setup_pmc_cpu(void *flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(&cpuhw->info, 0, sizeof(cpuhw->info));
+ qctri(&cpuhw->info);
+ cpuhw->flags |= PMU_F_RESERVED;
+ break;
+
+ case PMC_RELEASE:
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ break;
+ }
+
+ /* Disable CPU counter sets */
+ lcctl(0);
+}
+
+/* Initialize the CPU-measurement facility */
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+/* Release the CPU-measurement facility */
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+}
+
+/* Release the PMU if event is the last perf event */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
+static const int cpumf_generic_events_basic[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 0,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 1,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+};
+/* CPUMF <-> perf event mappings for userspace (problem-state set) */
+static const int cpumf_generic_events_user[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 32,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 33,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+ [PERF_COUNT_HW_CACHE_MISSES] = -1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+ [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+ [PERF_COUNT_HW_BUS_CYCLES] = -1,
+};
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ int err;
+ u64 ev;
+
+ switch (attr->type) {
+ case PERF_TYPE_RAW:
+ /* Raw events are used to access counters directly,
+ * hence do not permit excludes */
+ if (attr->exclude_kernel || attr->exclude_user ||
+ attr->exclude_hv)
+ return -EOPNOTSUPP;
+ ev = attr->config;
+ break;
+
+ case PERF_TYPE_HARDWARE:
+ ev = attr->config;
+ /* Count user space (problem-state) only */
+ if (!attr->exclude_user && attr->exclude_kernel) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+
+ /* No support for kernel space counters only */
+ } else if (!attr->exclude_kernel && attr->exclude_user) {
+ return -EOPNOTSUPP;
+
+ /* Count user and kernel space */
+ } else {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ if (ev == -1)
+ return -ENOENT;
+
+ if (ev >= PERF_CPUM_CF_MAX_CTR)
+ return -EINVAL;
+
+ /* Use the hardware perf event structure to store the counter number
+ * in 'config' member and the counter set to which the counter belongs
+ * in the 'config_base'. The counter set (config_base) is then used
+ * to enable/disable the counters.
+ */
+ hwc->config = ev;
+ hwc->config_base = get_counter_set(ev);
+
+ /* Validate the counter that is assigned to this event.
+ * Because the counter facility can use numerous counters at the
+ * same time without constraints, it is not necessary to explicity
+ * validate event groups (event->group_leader != event).
+ */
+ err = validate_event(hwc);
+ if (err)
+ return err;
+
+ /* Initialize for using the CPU-measurement counter facility */
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ /* Finally, validate version and authorization of the counter set */
+ err = validate_ctr_auth(hwc);
+ if (!err)
+ err = validate_ctr_version(hwc);
+
+ return err;
+}
+
+static int cpumf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ case PERF_TYPE_RAW:
+ err = __hw_perf_event_init(event);
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (unlikely(err) && event->destroy)
+ event->destroy(event);
+
+ return err;
+}
+
+static int hw_perf_event_reset(struct perf_event *event)
+{
+ u64 prev, new;
+ int err;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ err = ecctr(event->hw.config, &new);
+ if (err) {
+ if (err != 3)
+ break;
+ /* The counter is not (yet) available. This
+ * might happen if the counter set to which
+ * this counter belongs is in the disabled
+ * state.
+ */
+ new = 0;
+ }
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ return err;
+}
+
+static int hw_perf_event_update(struct perf_event *event)
+{
+ u64 prev, new, delta;
+ int err;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ err = ecctr(event->hw.config, &new);
+ if (err)
+ goto out;
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+ delta = (prev <= new) ? new - prev
+ : (-1ULL - prev) + new + 1; /* overflow */
+ local64_add(delta, &event->count);
+out:
+ return err;
+}
+
+static void cpumf_pmu_read(struct perf_event *event)
+{
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ hw_perf_event_update(event);
+}
+
+static void cpumf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ if (WARN_ON_ONCE(hwc->config == -1))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /* (Re-)enable and activate the counter set */
+ ctr_set_enable(&cpuhw->state, hwc->config_base);
+ ctr_set_start(&cpuhw->state, hwc->config_base);
+
+ /* The counter set to which this counter belongs can be already active.
+ * Because all counters in a set are active, the event->hw.prev_count
+ * needs to be synchronized. At this point, the counter set can be in
+ * the inactive or disabled state.
+ */
+ hw_perf_event_reset(event);
+
+ /* increment refcount for this counter set */
+ atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
+}
+
+static void cpumf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ /* Decrement reference count for this counter set and if this
+ * is the last used counter in the set, clear activation
+ * control and set the counter set state to inactive.
+ */
+ if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
+ ctr_set_stop(&cpuhw->state, hwc->config_base);
+ event->hw.state |= PERF_HES_STOPPED;
+ }
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int cpumf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ /* Check authorization for the counter set to which this
+ * counter belongs.
+ * For group events transaction, the authorization check is
+ * done in cpumf_pmu_commit_txn().
+ */
+ if (!(cpuhw->flags & PERF_EVENT_TXN))
+ if (validate_ctr_auth(&event->hw))
+ return -EPERM;
+
+ ctr_set_enable(&cpuhw->state, event->hw.config_base);
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ cpumf_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
+static void cpumf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ cpumf_pmu_stop(event, PERF_EF_UPDATE);
+
+ /* Check if any counter in the counter set is still used. If not used,
+ * change the counter set to the disabled state. This also clears the
+ * content of all counters in the set.
+ *
+ * When a new perf event has been added but not yet started, this can
+ * clear enable control and resets all counters in a set. Therefore,
+ * cpumf_pmu_start() always has to reenable a counter set.
+ */
+ if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
+ ctr_set_disable(&cpuhw->state, event->hw.config_base);
+
+ perf_event_update_userpage(event);
+}
+
+/*
+ * Start group events scheduling transaction.
+ * Set flags to perform a single test at commit time.
+ */
+static void cpumf_pmu_start_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ perf_pmu_disable(pmu);
+ cpuhw->flags |= PERF_EVENT_TXN;
+ cpuhw->tx_state = cpuhw->state;
+}
+
+/*
+ * Stop and cancel a group events scheduling tranctions.
+ * Assumes cpumf_pmu_del() is called for each successful added
+ * cpumf_pmu_add() during the transaction.
+ */
+static void cpumf_pmu_cancel_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON(cpuhw->tx_state != cpuhw->state);
+
+ cpuhw->flags &= ~PERF_EVENT_TXN;
+ perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit the group events scheduling transaction. On success, the
+ * transaction is closed. On error, the transaction is kept open
+ * until cpumf_pmu_cancel_txn() is called.
+ */
+static int cpumf_pmu_commit_txn(struct pmu *pmu)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ u64 state;
+
+ /* check if the updated state can be scheduled */
+ state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+ state >>= CPUMF_LCCTL_ENABLE_SHIFT;
+ if ((state & cpuhw->info.auth_ctl) != state)
+ return -EPERM;
+
+ cpuhw->flags &= ~PERF_EVENT_TXN;
+ perf_pmu_enable(pmu);
+ return 0;
+}
+
+/* Performance monitoring unit for s390x */
+static struct pmu cpumf_pmu = {
+ .pmu_enable = cpumf_pmu_enable,
+ .pmu_disable = cpumf_pmu_disable,
+ .event_init = cpumf_pmu_event_init,
+ .add = cpumf_pmu_add,
+ .del = cpumf_pmu_del,
+ .start = cpumf_pmu_start,
+ .stop = cpumf_pmu_stop,
+ .read = cpumf_pmu_read,
+ .start_txn = cpumf_pmu_start_txn,
+ .commit_txn = cpumf_pmu_commit_txn,
+ .cancel_txn = cpumf_pmu_cancel_txn,
+};
+
+static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (long) hcpu;
+ int flags;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ flags = PMC_INIT;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ case CPU_DOWN_PREPARE:
+ flags = PMC_RELEASE;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int __init cpumf_pmu_init(void)
+{
+ int rc;
+
+ if (!cpum_cf_avail())
+ return -ENODEV;
+
+ /* clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters */
+ ctl_clear_bit(0, 48);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts "
+ "failed with rc=%i\n", rc);
+ goto out;
+ }
+
+ /* The CPU measurement counter facility does not have overflow
+ * interrupts to do sampling. Sampling must be provided by
+ * external means, for example, by timers.
+ */
+ cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+ cpumf_pmu.attr_groups = cpumf_cf_event_group();
+ rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+ if (rc) {
+ pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ goto out;
+ }
+ perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+ return rc;
+}
+early_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 000000000..4554a4bae
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,322 @@
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+
+static struct attribute *cpumcf_pmu_event_attr[] = {
+ CPUMF_EVENT_PTR(cf, CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf, INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES),
+ CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES),
+ CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+ NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+ NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumcf_pmu_event_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+ struct attribute **b)
+{
+ struct attribute **new;
+ int j, i;
+
+ for (j = 0; a[j]; j++)
+ ;
+ for (i = 0; b[i]; i++)
+ j++;
+ j++;
+
+ new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ j = 0;
+ for (i = 0; a[i]; i++)
+ new[j++] = a[i];
+ for (i = 0; b[i]; i++)
+ new[j++] = b[i];
+ new[j] = NULL;
+
+ return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+ struct attribute **combined, **model;
+ struct cpuid cpu_id;
+
+ get_cpu_id(&cpu_id);
+ switch (cpu_id.machine) {
+ case 0x2097:
+ case 0x2098:
+ model = cpumcf_z10_pmu_event_attr;
+ break;
+ case 0x2817:
+ case 0x2818:
+ model = cpumcf_z196_pmu_event_attr;
+ break;
+ case 0x2827:
+ case 0x2828:
+ model = cpumcf_zec12_pmu_event_attr;
+ break;
+ default:
+ model = NULL;
+ break;
+ };
+
+ if (!model)
+ goto out;
+
+ combined = merge_attr(cpumcf_pmu_event_attr, model);
+ if (combined)
+ cpumsf_pmu_events_group.attrs = combined;
+out:
+ return cpumsf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 000000000..e6a1578fc
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,1639 @@
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "cpum_sf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT 1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+ return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account. Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size Buffer characteristics
+ * ---------------------------------------------------
+ * 64KB == 16 pages (4KB per page)
+ * 1 page for SDB-tables
+ * 15 pages for SDBs
+ *
+ * 32MB == 8192 pages (4KB per page)
+ * 16 pages for SDB-tables
+ * 8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+ unsigned long *sdbt; /* Sample-data-block-table origin */
+ /* buffer characteristics (required for buffer increments) */
+ unsigned long num_sdb; /* Number of sample-data-blocks */
+ unsigned long num_sdbt; /* Number of sample-data-block-tables */
+ unsigned long *tail; /* last sample-data-block-table */
+};
+
+struct cpu_hw_sf {
+ /* CPU-measurement sampling information block */
+ struct hws_qsi_info_block qsi;
+ /* CPU-measurement sampling control block */
+ struct hws_lsctl_request_block lsctl;
+ struct sf_buffer sfb; /* Sampling buffer */
+ unsigned int flags; /* Status flags */
+ struct perf_event *event; /* Scheduled perf event */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+ struct hws_lsctl_request_block sreq;
+
+ memset(&sreq, 0, sizeof(sreq));
+ return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+ return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+ unsigned long *sdbt, *curr;
+
+ if (!sfb->sdbt)
+ return;
+
+ sdbt = sfb->sdbt;
+ curr = sdbt;
+
+ /* Free the SDBT after all SDBs are processed... */
+ while (1) {
+ if (!*curr || !sdbt)
+ break;
+
+ /* Process table-link entries */
+ if (is_link_entry(curr)) {
+ curr = get_next_sdbt(curr);
+ if (sdbt)
+ free_page((unsigned long) sdbt);
+
+ /* If the origin is reached, sampling buffer is freed */
+ if (curr == sfb->sdbt)
+ break;
+ else
+ sdbt = curr;
+ } else {
+ /* Process SDB pointer */
+ if (*curr) {
+ free_page(*curr);
+ curr++;
+ }
+ }
+ }
+
+ debug_sprintf_event(sfdbg, 5,
+ "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+ memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+ unsigned long sdb, *trailer;
+
+ /* Allocate and initialize sample-data-block */
+ sdb = get_zeroed_page(gfp_flags);
+ if (!sdb)
+ return -ENOMEM;
+ trailer = trailer_entry_ptr(sdb);
+ *trailer = SDB_TE_ALERT_REQ_MASK;
+
+ /* Link SDB into the sample-data-block-table */
+ *sdbt = sdb;
+
+ return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ * sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+ unsigned long num_sdb, gfp_t gfp_flags)
+{
+ int i, rc;
+ unsigned long *new, *tail;
+
+ if (!sfb->sdbt || !sfb->tail)
+ return -EINVAL;
+
+ if (!is_link_entry(sfb->tail))
+ return -EINVAL;
+
+ /* Append to the existing sampling buffer, overwriting the table-link
+ * register.
+ * The tail variables always points to the "tail" (last and table-link)
+ * entry in an SDB-table.
+ */
+ tail = sfb->tail;
+
+ /* Do a sanity check whether the table-link entry points to
+ * the sampling buffer origin.
+ */
+ if (sfb->sdbt != get_next_sdbt(tail)) {
+ debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
+ "sampling buffer is not linked: origin=%p"
+ "tail=%p\n",
+ (void *) sfb->sdbt, (void *) tail);
+ return -EINVAL;
+ }
+
+ /* Allocate remaining SDBs */
+ rc = 0;
+ for (i = 0; i < num_sdb; i++) {
+ /* Allocate a new SDB-table if it is full. */
+ if (require_table_link(tail)) {
+ new = (unsigned long *) get_zeroed_page(gfp_flags);
+ if (!new) {
+ rc = -ENOMEM;
+ break;
+ }
+ sfb->num_sdbt++;
+ /* Link current page to tail of chain */
+ *tail = (unsigned long)(void *) new + 1;
+ tail = new;
+ }
+
+ /* Allocate a new sample-data-block.
+ * If there is not enough memory, stop the realloc process
+ * and simply use what was allocated. If this is a temporary
+ * issue, a new realloc call (if required) might succeed.
+ */
+ rc = alloc_sample_data_block(tail, gfp_flags);
+ if (rc)
+ break;
+ sfb->num_sdb++;
+ tail++;
+ }
+
+ /* Link sampling buffer to its origin */
+ *tail = (unsigned long) sfb->sdbt + 1;
+ sfb->tail = tail;
+
+ debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
+ " settings: sdbt=%lu sdb=%lu\n",
+ sfb->num_sdbt, sfb->num_sdb);
+ return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB). For each allocation,
+ * a 4K page is used. The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+ int rc;
+
+ if (sfb->sdbt)
+ return -EINVAL;
+
+ /* Allocate the sample-data-block-table origin */
+ sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
+ if (!sfb->sdbt)
+ return -ENOMEM;
+ sfb->num_sdb = 0;
+ sfb->num_sdbt = 1;
+
+ /* Link the table origin to point to itself to prepare for
+ * realloc_sampling_buffer() invocation.
+ */
+ sfb->tail = sfb->sdbt;
+ *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
+
+ /* Allocate requested number of sample-data-blocks */
+ rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+ if (rc) {
+ free_sampling_buffer(sfb);
+ debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
+ "realloc_sampling_buffer failed with rc=%i\n", rc);
+ } else
+ debug_sprintf_event(sfdbg, 4,
+ "alloc_sampling_buffer: tear=%p dear=%p\n",
+ sfb->sdbt, (void *) *sfb->sdbt);
+ return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+ struct hws_qsi_info_block si;
+
+ CPUM_SF_MIN_SDB = min;
+ CPUM_SF_MAX_SDB = max;
+
+ memset(&si, 0, sizeof(si));
+ if (!qsi(&si))
+ CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+ return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+ : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ if (!sfb->sdbt)
+ return SFB_ALLOC_REG(hwc);
+ if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+ return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+ return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ /* Limit the number of SDBs to not exceed the maximum */
+ num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+ if (num)
+ SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+ SFB_ALLOC_REG(hwc) = 0;
+ sfb_account_allocs(num, hwc);
+}
+
+static size_t event_sample_size(struct hw_perf_event *hwc)
+{
+ struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+ size_t sample_size;
+
+ /* The sample size depends on the sampling function: The basic-sampling
+ * function must be always enabled, diagnostic-sampling function is
+ * optional.
+ */
+ sample_size = sfr->bsdes;
+ if (SAMPL_DIAG_MODE(hwc))
+ sample_size += sfr->dsdes;
+
+ return sample_size;
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+ if (cpuhw->sfb.sdbt)
+ free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+ unsigned long n_sdb, freq, factor;
+ size_t sfr_size, sample_size;
+ struct sf_raw_sample *sfr;
+
+ /* Allocate raw sample buffer
+ *
+ * The raw sample buffer is used to temporarily store sampling data
+ * entries for perf raw sample processing. The buffer size mainly
+ * depends on the size of diagnostic-sampling data entries which is
+ * machine-specific. The exact size calculation includes:
+ * 1. The first 4 bytes of diagnostic-sampling data entries are
+ * already reflected in the sf_raw_sample structure. Subtract
+ * these bytes.
+ * 2. The perf raw sample data must be 8-byte aligned (u64) and
+ * perf's internal data size must be considered too. So add
+ * an additional u32 for correct alignment and subtract before
+ * allocating the buffer.
+ * 3. Store the raw sample buffer pointer in the perf event
+ * hardware structure.
+ */
+ sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) +
+ sizeof(u32), sizeof(u64));
+ sfr_size -= sizeof(u32);
+ sfr = kzalloc(sfr_size, GFP_KERNEL);
+ if (!sfr)
+ return -ENOMEM;
+ sfr->size = sfr_size;
+ sfr->bsdes = cpuhw->qsi.bsdes;
+ sfr->dsdes = cpuhw->qsi.dsdes;
+ RAWSAMPLE_REG(hwc) = (unsigned long) sfr;
+
+ /* Calculate sampling buffers using 4K pages
+ *
+ * 1. Determine the sample data size which depends on the used
+ * sampling functions, for example, basic-sampling or
+ * basic-sampling with diagnostic-sampling.
+ *
+ * 2. Use the sampling frequency as input. The sampling buffer is
+ * designed for almost one second. This can be adjusted through
+ * the "factor" variable.
+ * In any case, alloc_sampling_buffer() sets the Alert Request
+ * Control indicator to trigger a measurement-alert to harvest
+ * sample-data-blocks (sdb).
+ *
+ * 3. Compute the number of sample-data-blocks and ensure a minimum
+ * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
+ * exceed a "calculated" maximum. The symbolic maximum is
+ * designed for basic-sampling only and needs to be increased if
+ * diagnostic-sampling is active.
+ * See also the remarks for these symbolic constants.
+ *
+ * 4. Compute the number of sample-data-block-tables (SDBT) and
+ * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+ * to 511 SDBs).
+ */
+ sample_size = event_sample_size(hwc);
+ freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+ factor = 1;
+ n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
+ if (n_sdb < CPUM_SF_MIN_SDB)
+ n_sdb = CPUM_SF_MIN_SDB;
+
+ /* If there is already a sampling buffer allocated, it is very likely
+ * that the sampling facility is enabled too. If the event to be
+ * initialized requires a greater sampling buffer, the allocation must
+ * be postponed. Changing the sampling buffer requires the sampling
+ * facility to be in the disabled state. So, account the number of
+ * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+ * before the event is started.
+ */
+ sfb_init_allocs(n_sdb, hwc);
+ if (sf_buffer_available(cpuhw))
+ return 0;
+
+ debug_sprintf_event(sfdbg, 3,
+ "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
+ " sample_size=%lu cpuhw=%p\n",
+ SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+ sample_size, cpuhw);
+
+ return alloc_sampling_buffer(&cpuhw->sfb,
+ sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+ unsigned long min)
+{
+ return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+ /* Use a percentage-based approach to extend the sampling facility
+ * buffer. Accept up to 5% sample data loss.
+ * Vary the extents between 1% to 5% of the current number of
+ * sample-data-blocks.
+ */
+ if (ratio <= 5)
+ return 0;
+ if (ratio <= 25)
+ return min_percent(1, base, 1);
+ if (ratio <= 50)
+ return min_percent(1, base, 1);
+ if (ratio <= 75)
+ return min_percent(2, base, 2);
+ if (ratio <= 100)
+ return min_percent(3, base, 3);
+ if (ratio <= 250)
+ return min_percent(4, base, 4);
+
+ return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+ struct hw_perf_event *hwc)
+{
+ unsigned long ratio, num;
+
+ if (!OVERFLOW_REG(hwc))
+ return;
+
+ /* The sample_overflow contains the average number of sample data
+ * that has been lost because sample-data-blocks were full.
+ *
+ * Calculate the total number of sample data entries that has been
+ * discarded. Then calculate the ratio of lost samples to total samples
+ * per second in percent.
+ */
+ ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+ sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+ /* Compute number of sample-data-blocks */
+ num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+ if (num)
+ sfb_account_allocs(num, hwc);
+
+ debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
+ " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+ OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb: Sampling buffer structure (for local CPU)
+ * @hwc: Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ * change the sampling buffer structure. Do not call this function
+ * when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+ struct hw_perf_event *hwc)
+{
+ unsigned long num, num_old;
+ int rc;
+
+ num = sfb_pending_allocs(sfb, hwc);
+ if (!num)
+ return;
+ num_old = sfb->num_sdb;
+
+ /* Disable the sampling facility to reset any states and also
+ * clear pending measurement alerts.
+ */
+ sf_disable();
+
+ /* Extend the sampling buffer.
+ * This memory allocation typically happens in an atomic context when
+ * called by perf. Because this is a reallocation, it is fine if the
+ * new SDB-request cannot be satisfied immediately.
+ */
+ rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+ if (rc)
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
+ "failed with rc=%i\n", rc);
+
+ if (sfb_has_pending_allocs(sfb, hwc))
+ debug_sprintf_event(sfdbg, 5, "sfb: extend: "
+ "req=%lu alloc=%lu remaining=%lu\n",
+ num, sfb->num_sdb - num_old,
+ sfb_pending_allocs(sfb, hwc));
+}
+
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT 0
+#define PMC_RELEASE 1
+#define PMC_FAILURE 2
+static void setup_pmc_cpu(void *flags)
+{
+ int err;
+ struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
+
+ err = 0;
+ switch (*((int *) flags)) {
+ case PMC_INIT:
+ memset(cpusf, 0, sizeof(*cpusf));
+ err = qsi(&cpusf->qsi);
+ if (err)
+ break;
+ cpusf->flags |= PMU_F_RESERVED;
+ err = sf_disable();
+ if (err)
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+ break;
+ case PMC_RELEASE:
+ cpusf->flags &= ~PMU_F_RESERVED;
+ err = sf_disable();
+ if (err) {
+ pr_err("Switching off the sampling facility failed "
+ "with rc=%i\n", err);
+ } else
+ deallocate_buffers(cpusf);
+ debug_sprintf_event(sfdbg, 5,
+ "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+ break;
+ }
+ if (err)
+ *((int *) flags) |= PMC_FAILURE;
+}
+
+static void release_pmc_hardware(void)
+{
+ int flags = PMC_RELEASE;
+
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ perf_release_sampling();
+}
+
+static int reserve_pmc_hardware(void)
+{
+ int flags = PMC_INIT;
+ int err;
+
+ err = perf_reserve_sampling();
+ if (err)
+ return err;
+ on_each_cpu(setup_pmc_cpu, &flags, 1);
+ if (flags & PMC_FAILURE) {
+ release_pmc_hardware();
+ return -ENODEV;
+ }
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ /* Free raw sample buffer */
+ if (RAWSAMPLE_REG(&event->hw))
+ kfree((void *) RAWSAMPLE_REG(&event->hw));
+
+ /* Release PMC if this is the last perf event */
+ if (!atomic_add_unless(&num_events, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+ hwc->sample_period = period;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static void hw_reset_registers(struct hw_perf_event *hwc,
+ unsigned long *sdbt_origin)
+{
+ struct sf_raw_sample *sfr;
+
+ /* (Re)set to first sample-data-block-table */
+ TEAR_REG(hwc) = (unsigned long) sdbt_origin;
+
+ /* (Re)set raw sampling buffer register */
+ sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc);
+ memset(&sfr->basic, 0, sizeof(sfr->basic));
+ memset(&sfr->diag, 0, sfr->dsdes);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+ unsigned long rate)
+{
+ return clamp_t(unsigned long, rate,
+ si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct cpu_hw_sf *cpuhw;
+ struct hws_qsi_info_block si;
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long rate;
+ int cpu, err;
+
+ /* Reserve CPU-measurement sampling facility */
+ err = 0;
+ if (!atomic_inc_not_zero(&num_events)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+ err = -EBUSY;
+ else
+ atomic_inc(&num_events);
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+ event->destroy = hw_perf_event_destroy;
+
+ if (err)
+ goto out;
+
+ /* Access per-CPU sampling information (query sampling info) */
+ /*
+ * The event->cpu value can be -1 to count on every CPU, for example,
+ * when attaching to a task. If this is specified, use the query
+ * sampling info from the current CPU, otherwise use event->cpu to
+ * retrieve the per-CPU information.
+ * Later, cpuhw indicates whether to allocate sampling buffers for a
+ * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+ */
+ memset(&si, 0, sizeof(si));
+ cpuhw = NULL;
+ if (event->cpu == -1)
+ qsi(&si);
+ else {
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
+ * sampling structure for accessing the CPU-specific QSI.
+ */
+ cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+ si = cpuhw->qsi;
+ }
+
+ /* Check sampling facility authorization and, if not authorized,
+ * fall back to other PMUs. It is safe to check any CPU because
+ * the authorization is identical for all configured CPUs.
+ */
+ if (!si.as) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ /* Always enable basic sampling */
+ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+ /* Check if diagnostic sampling is requested. Deny if the required
+ * sampling authorization is missing.
+ */
+ if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+ if (!si.ad) {
+ err = -EPERM;
+ goto out;
+ }
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+ }
+
+ /* Check and set other sampling flags */
+ if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
+
+ /* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed. So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ */
+ rate = 0;
+ if (attr->freq) {
+ rate = freq_to_sample_rate(&si, attr->sample_freq);
+ rate = hw_limit_rate(&si, rate);
+ attr->freq = 0;
+ attr->sample_period = rate;
+ } else {
+ /* The min/max sampling rates specifies the valid range
+ * of sample periods. If the specified sample period is
+ * out of range, limit the period to the range boundary.
+ */
+ rate = hw_limit_rate(&si, hwc->sample_period);
+
+ /* The perf core maintains a maximum sample rate that is
+ * configurable through the sysctl interface. Ensure the
+ * sampling rate does not exceed this value. This also helps
+ * to avoid throttling when pushing samples with
+ * perf_event_overflow().
+ */
+ if (sample_rate_to_freq(&si, rate) >
+ sysctl_perf_event_sample_rate) {
+ err = -EINVAL;
+ debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
+ goto out;
+ }
+ }
+ SAMPL_RATE(hwc) = rate;
+ hw_init_period(hwc, SAMPL_RATE(hwc));
+
+ /* Initialize sample data overflow accounting */
+ hwc->extra_reg.reg = REG_OVERFLOW;
+ OVERFLOW_REG(hwc) = 0;
+
+ /* Allocate the per-CPU sampling buffer using the CPU information
+ * from the event. If the event is not pinned to a particular
+ * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+ * buffers for each online CPU.
+ */
+ if (cpuhw)
+ /* Event is pinned to a particular CPU */
+ err = allocate_buffers(cpuhw, hwc);
+ else {
+ /* Event is not pinned, allocate sampling buffer on
+ * each online CPU
+ */
+ for_each_online_cpu(cpu) {
+ cpuhw = &per_cpu(cpu_hw_sf, cpu);
+ err = allocate_buffers(cpuhw, hwc);
+ if (err)
+ break;
+ }
+ }
+out:
+ return err;
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+ int err;
+
+ /* No support for taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+ (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+ return -ENOENT;
+ break;
+ case PERF_TYPE_HARDWARE:
+ /* Support sampling of CPU cycles in addition to the
+ * counter facility. However, the counter facility
+ * is more precise and, hence, restrict this PMU to
+ * sampling events only.
+ */
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+ return -ENOENT;
+ if (!is_sampling_event(event))
+ return -ENOENT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ /* Check online status of the CPU to which the event is pinned */
+ if (event->cpu >= nr_cpumask_bits ||
+ (event->cpu >= 0 && !cpu_online(event->cpu)))
+ return -ENODEV;
+
+ /* Force reset of idle/hv excludes regardless of what the
+ * user requested.
+ */
+ if (event->attr.exclude_hv)
+ event->attr.exclude_hv = 0;
+ if (event->attr.exclude_idle)
+ event->attr.exclude_idle = 0;
+
+ err = __hw_perf_event_init(event);
+ if (unlikely(err))
+ if (event->destroy)
+ event->destroy(event);
+ return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ struct hw_perf_event *hwc;
+ int err;
+
+ if (cpuhw->flags & PMU_F_ENABLED)
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Check whether to extent the sampling buffer.
+ *
+ * Two conditions trigger an increase of the sampling buffer for a
+ * perf event:
+ * 1. Postponed buffer allocations from the event initialization.
+ * 2. Sampling overflows that contribute to pending allocations.
+ *
+ * Note that the extend_sampling_buffer() function disables the sampling
+ * facility, but it can be fully re-enabled using sampling controls that
+ * have been saved in cpumsf_pmu_disable().
+ */
+ if (cpuhw->event) {
+ hwc = &cpuhw->event->hw;
+ /* Account number of overflow-designated buffer extents */
+ sfb_account_overflows(cpuhw, hwc);
+ if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
+ }
+
+ /* (Re)enable the PMU and sampling facility */
+ cpuhw->flags |= PMU_F_ENABLED;
+ barrier();
+
+ err = lsctl(&cpuhw->lsctl);
+ if (err) {
+ cpuhw->flags &= ~PMU_F_ENABLED;
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 1, err);
+ return;
+ }
+
+ debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
+ "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
+ cpuhw->lsctl.ed, cpuhw->lsctl.cd,
+ (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ struct hws_lsctl_request_block inactive;
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!(cpuhw->flags & PMU_F_ENABLED))
+ return;
+
+ if (cpuhw->flags & PMU_F_ERR_MASK)
+ return;
+
+ /* Switch off sampling activation control */
+ inactive = cpuhw->lsctl;
+ inactive.cs = 0;
+ inactive.cd = 0;
+
+ err = lsctl(&inactive);
+ if (err) {
+ pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ 2, err);
+ return;
+ }
+
+ /* Save state of TEAR and DEAR register contents */
+ if (!qsi(&si)) {
+ /* TEAR/DEAR values are valid only if the sampling facility is
+ * enabled. Note that cpumsf_pmu_disable() might be called even
+ * for a disabled sampling facility because cpumsf_pmu_enable()
+ * controls the enable/disable state.
+ */
+ if (si.es) {
+ cpuhw->lsctl.tear = si.tear;
+ cpuhw->lsctl.dear = si.dear;
+ }
+ } else
+ debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
+ "qsi() failed with err=%i\n", err);
+
+ cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event: The perf event
+ * @regs: pt_regs structure
+ * @sde_regs: Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+ struct perf_sf_sde_regs *sde_regs)
+{
+ if (event->attr.exclude_user && user_mode(regs))
+ return 1;
+ if (event->attr.exclude_kernel && !user_mode(regs))
+ return 1;
+ if (event->attr.exclude_guest && sde_regs->in_guest)
+ return 1;
+ if (event->attr.exclude_host && !sde_regs->in_guest)
+ return 1;
+ return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event: The perf event
+ * @sample: Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample. The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
+{
+ int overflow;
+ struct pt_regs regs;
+ struct perf_sf_sde_regs *sde_regs;
+ struct perf_sample_data data;
+ struct perf_raw_record raw;
+
+ /* Setup perf sample */
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+ raw.size = sfr->size;
+ raw.data = sfr;
+ data.raw = &raw;
+
+ /* Setup pt_regs to look like an CPU-measurement external interrupt
+ * using the Program Request Alert code. The regs.int_parm_long
+ * field which is unused contains additional sample-data-entry related
+ * indicators.
+ */
+ memset(&regs, 0, sizeof(regs));
+ regs.int_code = 0x1407;
+ regs.int_parm = CPU_MF_INT_SF_PRA;
+ sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+ regs.psw.addr = sfr->basic.ia;
+ if (sfr->basic.T)
+ regs.psw.mask |= PSW_MASK_DAT;
+ if (sfr->basic.W)
+ regs.psw.mask |= PSW_MASK_WAIT;
+ if (sfr->basic.P)
+ regs.psw.mask |= PSW_MASK_PSTATE;
+ switch (sfr->basic.AS) {
+ case 0x0:
+ regs.psw.mask |= PSW_ASC_PRIMARY;
+ break;
+ case 0x1:
+ regs.psw.mask |= PSW_ASC_ACCREG;
+ break;
+ case 0x2:
+ regs.psw.mask |= PSW_ASC_SECONDARY;
+ break;
+ case 0x3:
+ regs.psw.mask |= PSW_ASC_HOME;
+ break;
+ }
+
+ /* The host-program-parameter (hpp) contains the sie control
+ * block that is set by sie64a() in entry64.S. Check if hpp
+ * refers to a valid control block and set sde_regs flags
+ * accordingly. This would allow to use hpp values for other
+ * purposes too.
+ * For now, simply use a non-zero value as guest indicator.
+ */
+ if (sfr->basic.hpp)
+ sde_regs->in_guest = 1;
+
+ overflow = 0;
+ if (perf_exclude_event(event, &regs, sde_regs))
+ goto out;
+ if (perf_event_overflow(event, &data, &regs)) {
+ overflow = 1;
+ event->pmu->stop(event, 0);
+ }
+ perf_event_update_userpage(event);
+out:
+ return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+ local64_add(count, &event->count);
+}
+
+static int sample_format_is_valid(struct hws_combined_entry *sample,
+ unsigned int flags)
+{
+ if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+ /* Only basic-sampling data entries with data-entry-format
+ * version of 0x0001 can be processed.
+ */
+ if (sample->basic.def != 0x0001)
+ return 0;
+ if (flags & PERF_CPUM_SF_DIAG_MODE)
+ /* The data-entry-format number of diagnostic-sampling data
+ * entries can vary. Because diagnostic data is just passed
+ * through, do only a sanity check on the DEF.
+ */
+ if (sample->diag.def < 0x8001)
+ return 0;
+ return 1;
+}
+
+static int sample_is_consistent(struct hws_combined_entry *sample,
+ unsigned long flags)
+{
+ /* This check applies only to basic-sampling data entries of potentially
+ * combined-sampling data entries. Invalid entries cannot be processed
+ * by the PMU and, thus, do not deliver an associated
+ * diagnostic-sampling data entry.
+ */
+ if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE)))
+ return 0;
+ /*
+ * Samples are skipped, if they are invalid or for which the
+ * instruction address is not predictable, i.e., the wait-state bit is
+ * set.
+ */
+ if (sample->basic.I || sample->basic.W)
+ return 0;
+ return 1;
+}
+
+static void reset_sample_slot(struct hws_combined_entry *sample,
+ unsigned long flags)
+{
+ if (likely(flags & PERF_CPUM_SF_BASIC_MODE))
+ sample->basic.def = 0;
+ if (flags & PERF_CPUM_SF_DIAG_MODE)
+ sample->diag.def = 0;
+}
+
+static void sfr_store_sample(struct sf_raw_sample *sfr,
+ struct hws_combined_entry *sample)
+{
+ if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE))
+ sfr->basic = sample->basic;
+ if (sfr->format & PERF_CPUM_SF_DIAG_MODE)
+ memcpy(&sfr->diag, &sample->diag, sfr->dsdes);
+}
+
+static void debug_sample_entry(struct hws_combined_entry *sample,
+ struct hws_trailer_entry *te,
+ unsigned long flags)
+{
+ debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
+ "sampling data entry: te->f=%i basic.def=%04x (%p)"
+ " diag.def=%04x (%p)\n", te->f,
+ sample->basic.def, &sample->basic,
+ (flags & PERF_CPUM_SF_DIAG_MODE)
+ ? sample->diag.def : 0xFFFF,
+ (flags & PERF_CPUM_SF_DIAG_MODE)
+ ? &sample->diag : NULL);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event: The perf event
+ * @sdbt: Sample-data-block table
+ * @overflow: Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem. Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries. A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry. The sampling function is determined by the flags in the perf
+ * event hardware structure. The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+ unsigned long long *overflow)
+{
+ unsigned long flags = SAMPL_FLAGS(&event->hw);
+ struct hws_combined_entry *sample;
+ struct hws_trailer_entry *te;
+ struct sf_raw_sample *sfr;
+ size_t sample_size;
+
+ /* Prepare and initialize raw sample data */
+ sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw);
+ sfr->format = flags & PERF_CPUM_SF_MODE_MASK;
+
+ sample_size = event_sample_size(&event->hw);
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+ sample = (struct hws_combined_entry *) *sdbt;
+ while ((unsigned long *) sample < (unsigned long *) te) {
+ /* Check for an empty sample */
+ if (!sample->basic.def)
+ break;
+
+ /* Update perf event period */
+ perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+ /* Check sampling data entry */
+ if (sample_format_is_valid(sample, flags)) {
+ /* If an event overflow occurred, the PMU is stopped to
+ * throttle event delivery. Remaining sample data is
+ * discarded.
+ */
+ if (!*overflow) {
+ if (sample_is_consistent(sample, flags)) {
+ /* Deliver sample data to perf */
+ sfr_store_sample(sfr, sample);
+ *overflow = perf_push_sample(event, sfr);
+ }
+ } else
+ /* Count discarded samples */
+ *overflow += 1;
+ } else {
+ debug_sample_entry(sample, te, flags);
+ /* Sample slot is not yet written or other record.
+ *
+ * This condition can occur if the buffer was reused
+ * from a combined basic- and diagnostic-sampling.
+ * If only basic-sampling is then active, entries are
+ * written into the larger diagnostic entries.
+ * This is typically the case for sample-data-blocks
+ * that are not full. Stop processing if the first
+ * invalid format was detected.
+ */
+ if (!te->f)
+ break;
+ }
+
+ /* Reset sample slot and advance to next sample */
+ reset_sample_slot(sample, flags);
+ sample += sample_size;
+ }
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event: The perf event
+ * @flush_all: Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed. Specify the flash_all flag
+ * to also walk through partially filled sample-data-blocks. It is ignored
+ * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag
+ * enforces the processing of full sample-data-blocks only (trailer entries
+ * with the block-full-indicator bit set).
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hws_trailer_entry *te;
+ unsigned long *sdbt;
+ unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
+ int done;
+
+ if (flush_all && SDB_FULL_BLOCKS(hwc))
+ flush_all = 0;
+
+ sdbt = (unsigned long *) TEAR_REG(hwc);
+ done = event_overflow = sampl_overflow = num_sdb = 0;
+ while (!done) {
+ /* Get the trailer entry of the sample-data-block */
+ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
+
+ /* Leave loop if no more work to do (block full indicator) */
+ if (!te->f) {
+ done = 1;
+ if (!flush_all)
+ break;
+ }
+
+ /* Check the sample overflow count */
+ if (te->overflow)
+ /* Account sample overflows and, if a particular limit
+ * is reached, extend the sampling buffer.
+ * For details, see sfb_account_overflows().
+ */
+ sampl_overflow += te->overflow;
+
+ /* Timestamps are valid for full sample-data-blocks only */
+ debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
+ "overflow=%llu timestamp=0x%llx\n",
+ sdbt, te->overflow,
+ (te->f) ? trailer_timestamp(te) : 0ULL);
+
+ /* Collect all samples from a single sample-data-block and
+ * flag if an (perf) event overflow happened. If so, the PMU
+ * is stopped and remaining samples will be discarded.
+ */
+ hw_collect_samples(event, sdbt, &event_overflow);
+ num_sdb++;
+
+ /* Reset trailer (using compare-double-and-swap) */
+ do {
+ te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
+ te_flags |= SDB_TE_ALERT_REQ_MASK;
+ } while (!cmpxchg_double(&te->flags, &te->overflow,
+ te->flags, te->overflow,
+ te_flags, 0ULL));
+
+ /* Advance to next sample-data-block */
+ sdbt++;
+ if (is_link_entry(sdbt))
+ sdbt = get_next_sdbt(sdbt);
+
+ /* Update event hardware registers */
+ TEAR_REG(hwc) = (unsigned long) sdbt;
+
+ /* Stop processing sample-data if all samples of the current
+ * sample-data-block were flushed even if it was not full.
+ */
+ if (flush_all && done)
+ break;
+
+ /* If an event overflow happened, discard samples by
+ * processing any remaining sample-data-blocks.
+ */
+ if (event_overflow)
+ flush_all = 1;
+ }
+
+ /* Account sample overflows in the event hardware structure */
+ if (sampl_overflow)
+ OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+ sampl_overflow, 1 + num_sdb);
+ if (sampl_overflow || event_overflow)
+ debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
+ "overflow stats: sample=%llu event=%llu\n",
+ sampl_overflow, event_overflow);
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+ /* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+ perf_pmu_disable(event->pmu);
+ event->hw.state = 0;
+ cpuhw->lsctl.cs = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.cd = 1;
+ perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+
+ perf_pmu_disable(event->pmu);
+ cpuhw->lsctl.cs = 0;
+ cpuhw->lsctl.cd = 0;
+ event->hw.state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+ hw_perf_event_update(event, 1);
+ event->hw.state |= PERF_HES_UPTODATE;
+ }
+ perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+ int err;
+
+ if (cpuhw->flags & PMU_F_IN_USE)
+ return -EAGAIN;
+
+ if (!cpuhw->sfb.sdbt)
+ return -EINVAL;
+
+ err = 0;
+ perf_pmu_disable(event->pmu);
+
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* Set up sampling controls. Always program the sampling register
+ * using the SDB-table start. Reset TEAR_REG event hardware register
+ * that is used by hw_perf_event_update() to store the sampling buffer
+ * position after samples have been flushed.
+ */
+ cpuhw->lsctl.s = 0;
+ cpuhw->lsctl.h = 1;
+ cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
+ cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+ hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+
+ /* Ensure sampling functions are in the disabled state. If disabled,
+ * switch on sampling enable control. */
+ if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+ err = -EAGAIN;
+ goto out;
+ }
+ cpuhw->lsctl.es = 1;
+ if (SAMPL_DIAG_MODE(&event->hw))
+ cpuhw->lsctl.ed = 1;
+
+ /* Set in_use flag and store event */
+ cpuhw->event = event;
+ cpuhw->flags |= PMU_F_IN_USE;
+
+ if (flags & PERF_EF_START)
+ cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+ return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ perf_pmu_disable(event->pmu);
+ cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+ cpuhw->lsctl.es = 0;
+ cpuhw->lsctl.ed = 0;
+ cpuhw->flags &= ~PMU_F_IN_USE;
+ cpuhw->event = NULL;
+
+ perf_event_update_userpage(event);
+ perf_pmu_enable(event->pmu);
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+static struct attribute *cpumsf_pmu_events_attr[] = {
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
+ NULL,
+ NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+ .name = "events",
+ .attrs = cpumsf_pmu_events_attr,
+};
+static struct attribute_group cpumsf_pmu_format_group = {
+ .name = "format",
+ .attrs = cpumsf_pmu_format_attr,
+};
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+ &cpumsf_pmu_events_group,
+ &cpumsf_pmu_format_group,
+ NULL,
+};
+
+static struct pmu cpumf_sampling = {
+ .pmu_enable = cpumsf_pmu_enable,
+ .pmu_disable = cpumsf_pmu_disable,
+
+ .event_init = cpumsf_pmu_event_init,
+ .add = cpumsf_pmu_add,
+ .del = cpumsf_pmu_del,
+
+ .start = cpumsf_pmu_start,
+ .stop = cpumsf_pmu_stop,
+ .read = cpumsf_pmu_read,
+
+ .attr_groups = cpumsf_pmu_attr_groups,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_hw_sf *cpuhw;
+
+ if (!(alert & CPU_MF_INT_SF_MASK))
+ return;
+ inc_irq_stat(IRQEXT_CMS);
+ cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+ /* Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case. */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* The processing below must take care of multiple alert events that
+ * might be indicated concurrently. */
+
+ /* Program alert request */
+ if (alert & CPU_MF_INT_SF_PRA) {
+ if (cpuhw->flags & PMU_F_IN_USE)
+ hw_perf_event_update(cpuhw->event, 0);
+ else
+ WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+ }
+
+ /* Report measurement alerts only for non-PRA codes */
+ if (alert != CPU_MF_INT_SF_PRA)
+ debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+
+ /* Sampling authorization change request */
+ if (alert & CPU_MF_INT_SF_SACA)
+ qsi(&cpuhw->qsi);
+
+ /* Loss of sample data due to high-priority machine activities */
+ if (alert & CPU_MF_INT_SF_LSDA) {
+ pr_err("Sample data was lost\n");
+ cpuhw->flags |= PMU_F_ERR_LSDA;
+ sf_disable();
+ }
+
+ /* Invalid sampling buffer entry */
+ if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+ pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+ alert);
+ cpuhw->flags |= PMU_F_ERR_IBE;
+ sf_disable();
+ }
+}
+
+static int cpumf_pmu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (long) hcpu;
+ int flags;
+
+ /* Ignore the notification if no events are scheduled on the PMU.
+ * This might be racy...
+ */
+ if (!atomic_read(&num_events))
+ return NOTIFY_OK;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ flags = PMC_INIT;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ case CPU_DOWN_PREPARE:
+ flags = PMC_RELEASE;
+ smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+ int rc;
+ unsigned long min, max;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+ if (!val || !strlen(val))
+ return -EINVAL;
+
+ /* Valid parameter values: "min,max" or "max" */
+ min = CPUM_SF_MIN_SDB;
+ max = CPUM_SF_MAX_SDB;
+ if (strchr(val, ','))
+ rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+ else
+ rc = kstrtoul(val, 10, &max);
+
+ if (min < 2 || min >= max || max > get_num_physpages())
+ rc = -EINVAL;
+ if (rc)
+ return rc;
+
+ sfb_set_limits(min, max);
+ pr_info("The sampling buffer limits have changed to: "
+ "min=%lu max=%lu (diag=x%lu)\n",
+ CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+ return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static struct kernel_param_ops param_ops_sfb_size = {
+ .set = param_set_sfb_size,
+ .get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI 0x0001
+#define RS_INIT_FAILURE_BSDES 0x0002
+#define RS_INIT_FAILURE_ALRT 0x0003
+#define RS_INIT_FAILURE_PERF 0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+ pr_err("Sampling facility support for perf is not available: "
+ "reason=%04x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+ struct hws_qsi_info_block si;
+ int err;
+
+ if (!cpum_sf_avail())
+ return -ENODEV;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+ return -ENODEV;
+ }
+
+ if (si.bsdes != sizeof(struct hws_basic_entry)) {
+ pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+ return -EINVAL;
+ }
+
+ if (si.ad) {
+ sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+ cpumsf_pmu_events_attr[1] =
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+ }
+
+ sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+ if (!sfdbg)
+ pr_err("Registering for s390dbf failed\n");
+ debug_register_view(sfdbg, &debug_sprintf_view);
+
+ err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+ goto out;
+ }
+
+ err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+ if (err) {
+ pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ goto out;
+ }
+ perf_cpu_notifier(cpumf_pmu_notifier);
+out:
+ return err;
+}
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
new file mode 100644
index 000000000..61595c1f0
--- /dev/null
+++ b/arch/s390/kernel/perf_event.c
@@ -0,0 +1,324 @@
+/*
+ * Performance event support for s390x
+ *
+ * Copyright IBM Corp. 2012, 2013
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#define KMSG_COMPONENT "perf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sysinfo.h>
+
+const char *perf_pmu_name(void)
+{
+ if (cpum_cf_avail() || cpum_sf_avail())
+ return "CPU-Measurement Facilities (CPU-MF)";
+ return "pmu";
+}
+EXPORT_SYMBOL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+ int num = 0;
+
+ if (cpum_cf_avail())
+ num += PERF_CPUM_CF_MAX_CTR;
+ if (cpum_sf_avail())
+ num += PERF_CPUM_SF_MAX_CTR;
+
+ return num;
+}
+EXPORT_SYMBOL(perf_num_counters);
+
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+ struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+ if (!stack)
+ return NULL;
+
+ return (struct kvm_s390_sie_block *) stack->empty1[0];
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return false;
+#if IS_ENABLED(CONFIG_KVM)
+ return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+ return false;
+#endif
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+ return sie_block(regs)->gpsw.addr & PSW_ADDR_INSN;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ return is_in_guest(regs) ? instruction_pointer_guest(regs)
+ : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+ return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+ struct perf_sf_sde_regs *sde_regs;
+ unsigned long flags;
+
+ sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+ if (sde_regs->in_guest)
+ flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else
+ flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+ return flags;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ /* Check if the cpum_sf PMU has created the pt_regs structure.
+ * In this case, perf misc flags can be easily extracted. Otherwise,
+ * do regular checks on the pt_regs content.
+ */
+ if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+ if (!regs->gprs[15])
+ return perf_misc_flags_sf(regs);
+
+ if (is_in_guest(regs))
+ return perf_misc_guest_flags(regs);
+
+ return user_mode(regs) ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+}
+
+static void print_debug_cf(void)
+{
+ struct cpumf_ctr_info cf_info;
+ int cpu = smp_processor_id();
+
+ memset(&cf_info, 0, sizeof(cf_info));
+ if (!qctri(&cf_info))
+ pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
+ cpu, cf_info.cfvn, cf_info.csvn,
+ cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+ struct hws_qsi_info_block si;
+ int cpu = smp_processor_id();
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+ cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+
+ if (si.as)
+ pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+ " bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+ if (si.ad)
+ pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+ " dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+ si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (cpum_cf_avail())
+ print_debug_cf();
+ if (cpum_sf_avail())
+ print_debug_sf();
+ local_irq_restore(flags);
+}
+
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+ struct cpumf_ctr_info ci;
+
+ memset(&ci, 0, sizeof(ci));
+ if (qctri(&ci))
+ return;
+
+ seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+ "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+ struct hws_qsi_info_block si;
+
+ memset(&si, 0, sizeof(si));
+ if (qsi(&si))
+ return;
+
+ if (!si.as && !si.ad)
+ return;
+
+ seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+ " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+ si.cpu_speed);
+ if (si.as)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+ " sample_size=%u\n", si.bsdes);
+ if (si.ad)
+ seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+ " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+ struct service_level *sl)
+{
+ if (cpum_cf_avail())
+ sl_print_counter(m);
+ if (cpum_sf_avail())
+ sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+ .seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+ return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
+/* See also arch/s390/kernel/traps.c */
+static unsigned long __store_trace(struct perf_callchain_entry *entry,
+ unsigned long sp,
+ unsigned long low, unsigned long high)
+{
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+
+ while (1) {
+ sp = sp & PSW_ADDR_INSN;
+ if (sp < low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+ /* Follow the backchain. */
+ while (1) {
+ low = sp;
+ sp = sf->back_chain & PSW_ADDR_INSN;
+ if (!sp)
+ break;
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *) sp;
+ perf_callchain_store(entry,
+ sf->gprs[8] & PSW_ADDR_INSN);
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long) (sf + 1);
+ if (sp <= low || sp > high - sizeof(*regs))
+ return sp;
+ regs = (struct pt_regs *) sp;
+ perf_callchain_store(entry, sf->gprs[8] & PSW_ADDR_INSN);
+ low = sp;
+ sp = regs->gprs[15];
+ }
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+ unsigned long head;
+ struct stack_frame *head_sf;
+
+ if (user_mode(regs))
+ return;
+
+ head = regs->gprs[15];
+ head_sf = (struct stack_frame *) head;
+
+ if (!head_sf || !head_sf->back_chain)
+ return;
+
+ head = head_sf->back_chain;
+ head = __store_trace(entry, head, S390_lowcore.async_stack - ASYNC_SIZE,
+ S390_lowcore.async_stack);
+
+ __store_trace(entry, head, S390_lowcore.thread_info,
+ S390_lowcore.thread_info + THREAD_SIZE);
+}
+
+/* Perf defintions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%04llx,name=%s\n",
+ pmu_attr->id, attr->attr.name);
+}
+
+/* Reserve/release functions for sharing perf hardware */
+static DEFINE_SPINLOCK(perf_hw_owner_lock);
+static void *perf_sampling_owner;
+
+int perf_reserve_sampling(void)
+{
+ int err;
+
+ err = 0;
+ spin_lock(&perf_hw_owner_lock);
+ if (perf_sampling_owner) {
+ pr_warn("The sampling facility is already reserved by %p\n",
+ perf_sampling_owner);
+ err = -EBUSY;
+ } else
+ perf_sampling_owner = __builtin_return_address(0);
+ spin_unlock(&perf_hw_owner_lock);
+ return err;
+}
+EXPORT_SYMBOL(perf_reserve_sampling);
+
+void perf_release_sampling(void)
+{
+ spin_lock(&perf_hw_owner_lock);
+ WARN_ON(!perf_sampling_owner);
+ perf_sampling_owner = NULL;
+ spin_unlock(&perf_hw_owner_lock);
+}
+EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
new file mode 100644
index 000000000..036aa01d0
--- /dev/null
+++ b/arch/s390/kernel/pgm_check.S
@@ -0,0 +1,146 @@
+/*
+ * Program check table.
+ *
+ * Copyright IBM Corp. 2012
+ */
+
+#include <linux/linkage.h>
+
+#define PGM_CHECK(handler) .long handler
+#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler)
+
+/*
+ * The program check table contains exactly 128 (0x00-0x7f) entries. Each
+ * line defines the function to be called corresponding to the program check
+ * interruption code.
+ */
+.section .rodata, "a"
+ENTRY(pgm_check_table)
+PGM_CHECK_DEFAULT /* 00 */
+PGM_CHECK(illegal_op) /* 01 */
+PGM_CHECK(privileged_op) /* 02 */
+PGM_CHECK(execute_exception) /* 03 */
+PGM_CHECK(do_protection_exception) /* 04 */
+PGM_CHECK(addressing_exception) /* 05 */
+PGM_CHECK(specification_exception) /* 06 */
+PGM_CHECK(data_exception) /* 07 */
+PGM_CHECK(overflow_exception) /* 08 */
+PGM_CHECK(divide_exception) /* 09 */
+PGM_CHECK(overflow_exception) /* 0a */
+PGM_CHECK(divide_exception) /* 0b */
+PGM_CHECK(hfp_overflow_exception) /* 0c */
+PGM_CHECK(hfp_underflow_exception) /* 0d */
+PGM_CHECK(hfp_significance_exception) /* 0e */
+PGM_CHECK(hfp_divide_exception) /* 0f */
+PGM_CHECK(do_dat_exception) /* 10 */
+PGM_CHECK(do_dat_exception) /* 11 */
+PGM_CHECK(translation_exception) /* 12 */
+PGM_CHECK(special_op_exception) /* 13 */
+PGM_CHECK_DEFAULT /* 14 */
+PGM_CHECK(operand_exception) /* 15 */
+PGM_CHECK_DEFAULT /* 16 */
+PGM_CHECK_DEFAULT /* 17 */
+PGM_CHECK(transaction_exception) /* 18 */
+PGM_CHECK_DEFAULT /* 19 */
+PGM_CHECK_DEFAULT /* 1a */
+PGM_CHECK(vector_exception) /* 1b */
+PGM_CHECK(space_switch_exception) /* 1c */
+PGM_CHECK(hfp_sqrt_exception) /* 1d */
+PGM_CHECK_DEFAULT /* 1e */
+PGM_CHECK_DEFAULT /* 1f */
+PGM_CHECK_DEFAULT /* 20 */
+PGM_CHECK_DEFAULT /* 21 */
+PGM_CHECK_DEFAULT /* 22 */
+PGM_CHECK_DEFAULT /* 23 */
+PGM_CHECK_DEFAULT /* 24 */
+PGM_CHECK_DEFAULT /* 25 */
+PGM_CHECK_DEFAULT /* 26 */
+PGM_CHECK_DEFAULT /* 27 */
+PGM_CHECK_DEFAULT /* 28 */
+PGM_CHECK_DEFAULT /* 29 */
+PGM_CHECK_DEFAULT /* 2a */
+PGM_CHECK_DEFAULT /* 2b */
+PGM_CHECK_DEFAULT /* 2c */
+PGM_CHECK_DEFAULT /* 2d */
+PGM_CHECK_DEFAULT /* 2e */
+PGM_CHECK_DEFAULT /* 2f */
+PGM_CHECK_DEFAULT /* 30 */
+PGM_CHECK_DEFAULT /* 31 */
+PGM_CHECK_DEFAULT /* 32 */
+PGM_CHECK_DEFAULT /* 33 */
+PGM_CHECK_DEFAULT /* 34 */
+PGM_CHECK_DEFAULT /* 35 */
+PGM_CHECK_DEFAULT /* 36 */
+PGM_CHECK_DEFAULT /* 37 */
+PGM_CHECK(do_dat_exception) /* 38 */
+PGM_CHECK(do_dat_exception) /* 39 */
+PGM_CHECK(do_dat_exception) /* 3a */
+PGM_CHECK(do_dat_exception) /* 3b */
+PGM_CHECK_DEFAULT /* 3c */
+PGM_CHECK_DEFAULT /* 3d */
+PGM_CHECK_DEFAULT /* 3e */
+PGM_CHECK_DEFAULT /* 3f */
+PGM_CHECK_DEFAULT /* 40 */
+PGM_CHECK_DEFAULT /* 41 */
+PGM_CHECK_DEFAULT /* 42 */
+PGM_CHECK_DEFAULT /* 43 */
+PGM_CHECK_DEFAULT /* 44 */
+PGM_CHECK_DEFAULT /* 45 */
+PGM_CHECK_DEFAULT /* 46 */
+PGM_CHECK_DEFAULT /* 47 */
+PGM_CHECK_DEFAULT /* 48 */
+PGM_CHECK_DEFAULT /* 49 */
+PGM_CHECK_DEFAULT /* 4a */
+PGM_CHECK_DEFAULT /* 4b */
+PGM_CHECK_DEFAULT /* 4c */
+PGM_CHECK_DEFAULT /* 4d */
+PGM_CHECK_DEFAULT /* 4e */
+PGM_CHECK_DEFAULT /* 4f */
+PGM_CHECK_DEFAULT /* 50 */
+PGM_CHECK_DEFAULT /* 51 */
+PGM_CHECK_DEFAULT /* 52 */
+PGM_CHECK_DEFAULT /* 53 */
+PGM_CHECK_DEFAULT /* 54 */
+PGM_CHECK_DEFAULT /* 55 */
+PGM_CHECK_DEFAULT /* 56 */
+PGM_CHECK_DEFAULT /* 57 */
+PGM_CHECK_DEFAULT /* 58 */
+PGM_CHECK_DEFAULT /* 59 */
+PGM_CHECK_DEFAULT /* 5a */
+PGM_CHECK_DEFAULT /* 5b */
+PGM_CHECK_DEFAULT /* 5c */
+PGM_CHECK_DEFAULT /* 5d */
+PGM_CHECK_DEFAULT /* 5e */
+PGM_CHECK_DEFAULT /* 5f */
+PGM_CHECK_DEFAULT /* 60 */
+PGM_CHECK_DEFAULT /* 61 */
+PGM_CHECK_DEFAULT /* 62 */
+PGM_CHECK_DEFAULT /* 63 */
+PGM_CHECK_DEFAULT /* 64 */
+PGM_CHECK_DEFAULT /* 65 */
+PGM_CHECK_DEFAULT /* 66 */
+PGM_CHECK_DEFAULT /* 67 */
+PGM_CHECK_DEFAULT /* 68 */
+PGM_CHECK_DEFAULT /* 69 */
+PGM_CHECK_DEFAULT /* 6a */
+PGM_CHECK_DEFAULT /* 6b */
+PGM_CHECK_DEFAULT /* 6c */
+PGM_CHECK_DEFAULT /* 6d */
+PGM_CHECK_DEFAULT /* 6e */
+PGM_CHECK_DEFAULT /* 6f */
+PGM_CHECK_DEFAULT /* 70 */
+PGM_CHECK_DEFAULT /* 71 */
+PGM_CHECK_DEFAULT /* 72 */
+PGM_CHECK_DEFAULT /* 73 */
+PGM_CHECK_DEFAULT /* 74 */
+PGM_CHECK_DEFAULT /* 75 */
+PGM_CHECK_DEFAULT /* 76 */
+PGM_CHECK_DEFAULT /* 77 */
+PGM_CHECK_DEFAULT /* 78 */
+PGM_CHECK_DEFAULT /* 79 */
+PGM_CHECK_DEFAULT /* 7a */
+PGM_CHECK_DEFAULT /* 7b */
+PGM_CHECK_DEFAULT /* 7c */
+PGM_CHECK_DEFAULT /* 7d */
+PGM_CHECK_DEFAULT /* 7e */
+PGM_CHECK_DEFAULT /* 7f */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
new file mode 100644
index 000000000..dc5edc29b
--- /dev/null
+++ b/arch/s390/kernel/process.c
@@ -0,0 +1,226 @@
+/*
+ * This file handles the architecture dependent parts of process handling.
+ *
+ * Copyright IBM Corp. 1999, 2009
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Hartmut Penner <hp@de.ibm.com>,
+ * Denis Joseph Barrow,
+ */
+
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/tick.h>
+#include <linux/personality.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+#include <linux/module.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/vtimer.h>
+#include <asm/exec.h>
+#include <asm/irq.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
+#include "entry.h"
+
+asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
+
+/*
+ * Return saved PC of a blocked thread. used in kernel/sched.
+ * resume in entry.S does not create a new stack frame, it
+ * just stores the registers %r6-%r15 to the frame given by
+ * schedule. We want to return the address of the caller of
+ * schedule, so we have to walk the backchain one time to
+ * find the frame schedule() store its return address.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+ struct stack_frame *sf, *low, *high;
+
+ if (!tsk || !task_stack_page(tsk))
+ return 0;
+ low = task_stack_page(tsk);
+ high = (struct stack_frame *) task_pt_regs(tsk);
+ sf = (struct stack_frame *) (tsk->thread.ksp & PSW_ADDR_INSN);
+ if (sf <= low || sf > high)
+ return 0;
+ sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+ if (sf <= low || sf > high)
+ return 0;
+ return sf->gprs[8];
+}
+
+extern void kernel_thread_starter(void);
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+ exit_thread_runtime_instr();
+}
+
+void flush_thread(void)
+{
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+}
+
+void arch_release_task_struct(struct task_struct *tsk)
+{
+ if (tsk->thread.vxrs)
+ kfree(tsk->thread.vxrs);
+}
+
+int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
+ unsigned long arg, struct task_struct *p)
+{
+ struct thread_info *ti;
+ struct fake_frame
+ {
+ struct stack_frame sf;
+ struct pt_regs childregs;
+ } *frame;
+
+ frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
+ p->thread.ksp = (unsigned long) frame;
+ /* Save access registers to new thread structure. */
+ save_access_regs(&p->thread.acrs[0]);
+ /* start new process with ar4 pointing to the correct address space */
+ p->thread.mm_segment = get_fs();
+ /* Don't copy debug registers */
+ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+ memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+ clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+ /* Initialize per thread user and system timer values */
+ ti = task_thread_info(p);
+ ti->user_timer = 0;
+ ti->system_timer = 0;
+
+ frame->sf.back_chain = 0;
+ /* new return point is ret_from_fork */
+ frame->sf.gprs[8] = (unsigned long) ret_from_fork;
+ /* fake return stack for resume(), don't go back to schedule */
+ frame->sf.gprs[9] = (unsigned long) frame;
+
+ /* Store access registers to kernel stack of new process. */
+ if (unlikely(p->flags & PF_KTHREAD)) {
+ /* kernel thread */
+ memset(&frame->childregs, 0, sizeof(struct pt_regs));
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.addr = PSW_ADDR_AMODE |
+ (unsigned long) kernel_thread_starter;
+ frame->childregs.gprs[9] = new_stackp; /* function */
+ frame->childregs.gprs[10] = arg;
+ frame->childregs.gprs[11] = (unsigned long) do_exit;
+ frame->childregs.orig_gpr2 = -1;
+
+ return 0;
+ }
+ frame->childregs = *current_pt_regs();
+ frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
+ frame->childregs.flags = 0;
+ if (new_stackp)
+ frame->childregs.gprs[15] = new_stackp;
+
+ /* Don't copy runtime instrumentation info */
+ p->thread.ri_cb = NULL;
+ p->thread.ri_signum = 0;
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
+
+ /* Save the fpu registers to new thread structure. */
+ save_fp_ctl(&p->thread.fp_regs.fpc);
+ save_fp_regs(p->thread.fp_regs.fprs);
+ p->thread.fp_regs.pad = 0;
+ p->thread.vxrs = NULL;
+ /* Set a new TLS ? */
+ if (clone_flags & CLONE_SETTLS) {
+ unsigned long tls = frame->childregs.gprs[6];
+ if (is_compat_task()) {
+ p->thread.acrs[0] = (unsigned int)tls;
+ } else {
+ p->thread.acrs[0] = (unsigned int)(tls >> 32);
+ p->thread.acrs[1] = (unsigned int)tls;
+ }
+ }
+ return 0;
+}
+
+asmlinkage void execve_tail(void)
+{
+ current->thread.fp_regs.fpc = 0;
+ asm volatile("sfpc %0,%0" : : "d" (0));
+}
+
+/*
+ * fill in the FPU structure for a core dump.
+ */
+int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
+{
+ save_fp_ctl(&fpregs->fpc);
+ save_fp_regs(fpregs->fprs);
+ return 1;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+unsigned long get_wchan(struct task_struct *p)
+{
+ struct stack_frame *sf, *low, *high;
+ unsigned long return_address;
+ int count;
+
+ if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
+ return 0;
+ low = task_stack_page(p);
+ high = (struct stack_frame *) task_pt_regs(p);
+ sf = (struct stack_frame *) (p->thread.ksp & PSW_ADDR_INSN);
+ if (sf <= low || sf > high)
+ return 0;
+ for (count = 0; count < 16; count++) {
+ sf = (struct stack_frame *) (sf->back_chain & PSW_ADDR_INSN);
+ if (sf <= low || sf > high)
+ return 0;
+ return_address = sf->gprs[8] & PSW_ADDR_INSN;
+ if (!in_sched_functions(return_address))
+ return return_address;
+ }
+ return 0;
+}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() & ~PAGE_MASK;
+ return sp & ~0xf;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+ /* 8MB for 32bit, 1GB for 64bit */
+ if (is_32bit_task())
+ return (get_random_int() & 0x7ffUL) << PAGE_SHIFT;
+ else
+ return (get_random_int() & 0x3ffffUL) << PAGE_SHIFT;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long ret;
+
+ ret = PAGE_ALIGN(mm->brk + brk_rnd());
+ return (ret > mm->brk) ? ret : mm->brk;
+}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
new file mode 100644
index 000000000..dc488e13b
--- /dev/null
+++ b/arch/s390/kernel/processor.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/delay.h>
+#include <linux/cpu.h>
+#include <asm/elf.h>
+#include <asm/lowcore.h>
+#include <asm/param.h>
+#include <asm/smp.h>
+
+static DEFINE_PER_CPU(struct cpuid, cpu_id);
+
+void notrace cpu_relax(void)
+{
+ if (!smp_cpu_mtid && MACHINE_HAS_DIAG44)
+ asm volatile("diag 0,0,0x44");
+ barrier();
+}
+EXPORT_SYMBOL(cpu_relax);
+
+/*
+ * cpu_init - initializes state that is per-CPU.
+ */
+void cpu_init(void)
+{
+ struct cpuid *id = this_cpu_ptr(&cpu_id);
+
+ get_cpu_id(id);
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+ BUG_ON(current->mm);
+ enter_lazy_tlb(&init_mm, current);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+ static const char *hwcap_str[] = {
+ "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
+ "edat", "etf3eh", "highgprs", "te", "vx"
+ };
+ unsigned long n = (unsigned long) v - 1;
+ int i;
+
+ if (!n) {
+ s390_adjust_jiffies();
+ seq_printf(m, "vendor_id : IBM/S390\n"
+ "# processors : %i\n"
+ "bogomips per cpu: %lu.%02lu\n",
+ num_online_cpus(), loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ))%100);
+ seq_puts(m, "features\t: ");
+ for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+ if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+ seq_printf(m, "%s ", hwcap_str[i]);
+ seq_puts(m, "\n");
+ show_cacheinfo(m);
+ }
+ get_online_cpus();
+ if (cpu_online(n)) {
+ struct cpuid *id = &per_cpu(cpu_id, n);
+ seq_printf(m, "processor %li: "
+ "version = %02X, "
+ "identification = %06X, "
+ "machine = %04X\n",
+ n, id->version, id->ident, id->machine);
+ }
+ put_online_cpus();
+ return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+ return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ ++*pos;
+ return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
+};
+
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
new file mode 100644
index 000000000..d363c9c32
--- /dev/null
+++ b/arch/s390/kernel/ptrace.c
@@ -0,0 +1,1533 @@
+/*
+ * Ptrace user space interface.
+ *
+ * Copyright IBM Corp. 1999, 2010
+ * Author(s): Denis Joseph Barrow
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/seccomp.h>
+#include <linux/compat.h>
+#include <trace/syscall.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+#ifdef CONFIG_COMPAT
+#include "compat_ptrace.h"
+#endif
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+void update_cr_regs(struct task_struct *task)
+{
+ struct pt_regs *regs = task_pt_regs(task);
+ struct thread_struct *thread = &task->thread;
+ struct per_regs old, new;
+
+ /* Take care of the enable/disable of transactional execution. */
+ if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
+ unsigned long cr, cr_new;
+
+ __ctl_store(cr, 0, 0);
+ cr_new = cr;
+ if (MACHINE_HAS_TE) {
+ /* Set or clear transaction execution TXC bit 8. */
+ cr_new |= (1UL << 55);
+ if (task->thread.per_flags & PER_FLAG_NO_TE)
+ cr_new &= ~(1UL << 55);
+ }
+ if (MACHINE_HAS_VX) {
+ /* Enable/disable of vector extension */
+ cr_new &= ~(1UL << 17);
+ if (task->thread.vxrs)
+ cr_new |= (1UL << 17);
+ }
+ if (cr_new != cr)
+ __ctl_load(cr_new, 0, 0);
+ if (MACHINE_HAS_TE) {
+ /* Set/clear transaction execution TDC bits 62/63. */
+ __ctl_store(cr, 2, 2);
+ cr_new = cr & ~3UL;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags &
+ PER_FLAG_TE_ABORT_RAND_TEND)
+ cr_new |= 1UL;
+ else
+ cr_new |= 2UL;
+ }
+ if (cr_new != cr)
+ __ctl_load(cr_new, 2, 2);
+ }
+ }
+ /* Copy user specified PER registers */
+ new.control = thread->per_user.control;
+ new.start = thread->per_user.start;
+ new.end = thread->per_user.end;
+
+ /* merge TIF_SINGLE_STEP into user specified PER registers. */
+ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
+ test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
+ if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
+ new.control |= PER_EVENT_BRANCH;
+ else
+ new.control |= PER_EVENT_IFETCH;
+ new.control |= PER_CONTROL_SUSPENSION;
+ new.control |= PER_EVENT_TRANSACTION_END;
+ if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
+ new.control |= PER_EVENT_IFETCH;
+ new.start = 0;
+ new.end = PSW_ADDR_INSN;
+ }
+
+ /* Take care of the PER enablement bit in the PSW. */
+ if (!(new.control & PER_EVENT_MASK)) {
+ regs->psw.mask &= ~PSW_MASK_PER;
+ return;
+ }
+ regs->psw.mask |= PSW_MASK_PER;
+ __ctl_store(old, 9, 11);
+ if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
+ __ctl_load(new, 9, 11);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+ set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+ clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+ set_tsk_thread_flag(task, TIF_BLOCK_STEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Clear all debugging related fields.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+ memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
+ memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
+ clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+ clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP);
+ task->thread.per_flags = 0;
+}
+
+#define __ADDR_MASK 7
+
+static inline unsigned long __peek_user_per(struct task_struct *child,
+ addr_t addr)
+{
+ struct per_struct_kernel *dummy = NULL;
+
+ if (addr == (addr_t) &dummy->cr9)
+ /* Control bits of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ PER_EVENT_IFETCH : child->thread.per_user.control;
+ else if (addr == (addr_t) &dummy->cr10)
+ /* Start address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ 0 : child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy->cr11)
+ /* End address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ PSW_ADDR_INSN : child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy->bits)
+ /* Single-step bit. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ (1UL << (BITS_PER_LONG - 1)) : 0;
+ else if (addr == (addr_t) &dummy->starting_addr)
+ /* Start address of the user specified per set. */
+ return child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy->ending_addr)
+ /* End address of the user specified per set. */
+ return child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy->perc_atmid)
+ /* PER code, ATMID and AI of the last PER trap */
+ return (unsigned long)
+ child->thread.per_event.cause << (BITS_PER_LONG - 16);
+ else if (addr == (addr_t) &dummy->address)
+ /* Address of the last PER trap */
+ return child->thread.per_event.address;
+ else if (addr == (addr_t) &dummy->access_id)
+ /* Access id of the last PER trap */
+ return (unsigned long)
+ child->thread.per_event.paid << (BITS_PER_LONG - 8);
+ return 0;
+}
+
+/*
+ * Read the word at offset addr from the user area of a process. The
+ * trouble here is that the information is littered over different
+ * locations. The process registers are found on the kernel stack,
+ * the floating point stuff and the trace settings are stored in
+ * the task structure. In addition the different structures in
+ * struct user contain pad bytes that should be read as zeroes.
+ * Lovely...
+ */
+static unsigned long __peek_user(struct task_struct *child, addr_t addr)
+{
+ struct user *dummy = NULL;
+ addr_t offset, tmp;
+
+ if (addr < (addr_t) &dummy->regs.acrs) {
+ /*
+ * psw and gprs are stored on the stack
+ */
+ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
+ if (addr == (addr_t) &dummy->regs.psw.mask) {
+ /* Return a clean psw mask. */
+ tmp &= PSW_MASK_USER | PSW_MASK_RI;
+ tmp |= PSW_USER_BITS;
+ }
+
+ } else if (addr < (addr_t) &dummy->regs.orig_gpr2) {
+ /*
+ * access registers are stored in the thread structure
+ */
+ offset = addr - (addr_t) &dummy->regs.acrs;
+ /*
+ * Very special case: old & broken 64 bit gdb reading
+ * from acrs[15]. Result is a 64 bit value. Read the
+ * 32 bit acrs[15] value and shift it by 32. Sick...
+ */
+ if (addr == (addr_t) &dummy->regs.acrs[15])
+ tmp = ((unsigned long) child->thread.acrs[15]) << 32;
+ else
+ tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
+
+ } else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+ /*
+ * orig_gpr2 is stored on the kernel stack
+ */
+ tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
+
+ } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ /*
+ * prevent reads of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ tmp = 0;
+
+ } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ tmp = child->thread.fp_regs.fpc;
+ tmp <<= BITS_PER_LONG - 32;
+
+ } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fp_regs
+ * or the child->thread.vxrs array
+ */
+ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+ if (child->thread.vxrs)
+ tmp = *(addr_t *)
+ ((addr_t) child->thread.vxrs + 2*offset);
+ else
+ tmp = *(addr_t *)
+ ((addr_t) &child->thread.fp_regs.fprs + offset);
+
+ } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+ /*
+ * Handle access to the per_info structure.
+ */
+ addr -= (addr_t) &dummy->regs.per_info;
+ tmp = __peek_user_per(child, addr);
+
+ } else
+ tmp = 0;
+
+ return tmp;
+}
+
+static int
+peek_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+ addr_t tmp, mask;
+
+ /*
+ * Stupid gdb peeks/pokes the access registers in 64 bit with
+ * an alignment of 4. Programmers from hell...
+ */
+ mask = __ADDR_MASK;
+ if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
+ addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ mask = 3;
+ if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+ return -EIO;
+
+ tmp = __peek_user(child, addr);
+ return put_user(tmp, (addr_t __user *) data);
+}
+
+static inline void __poke_user_per(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ struct per_struct_kernel *dummy = NULL;
+
+ /*
+ * There are only three fields in the per_info struct that the
+ * debugger user can write to.
+ * 1) cr9: the debugger wants to set a new PER event mask
+ * 2) starting_addr: the debugger wants to set a new starting
+ * address to use with the PER event mask.
+ * 3) ending_addr: the debugger wants to set a new ending
+ * address to use with the PER event mask.
+ * The user specified PER event mask and the start and end
+ * addresses are used only if single stepping is not in effect.
+ * Writes to any other field in per_info are ignored.
+ */
+ if (addr == (addr_t) &dummy->cr9)
+ /* PER event mask of the user specified per set. */
+ child->thread.per_user.control =
+ data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+ else if (addr == (addr_t) &dummy->starting_addr)
+ /* Starting address of the user specified per set. */
+ child->thread.per_user.start = data;
+ else if (addr == (addr_t) &dummy->ending_addr)
+ /* Ending address of the user specified per set. */
+ child->thread.per_user.end = data;
+}
+
+/*
+ * Write a word to the user area of a process at location addr. This
+ * operation does have an additional problem compared to peek_user.
+ * Stores to the program status word and on the floating point
+ * control register needs to get checked for validity.
+ */
+static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+ struct user *dummy = NULL;
+ addr_t offset;
+
+ if (addr < (addr_t) &dummy->regs.acrs) {
+ /*
+ * psw and gprs are stored on the stack
+ */
+ if (addr == (addr_t) &dummy->regs.psw.mask) {
+ unsigned long mask = PSW_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
+ if ((data ^ PSW_USER_BITS) & ~mask)
+ /* Invalid psw mask. */
+ return -EINVAL;
+ if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
+ if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+ /* Invalid addressing mode bits */
+ return -EINVAL;
+ }
+ *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
+
+ } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
+ /*
+ * access registers are stored in the thread structure
+ */
+ offset = addr - (addr_t) &dummy->regs.acrs;
+ /*
+ * Very special case: old & broken 64 bit gdb writing
+ * to acrs[15] with a 64 bit value. Ignore the lower
+ * half of the value and write the upper 32 bit to
+ * acrs[15]. Sick...
+ */
+ if (addr == (addr_t) &dummy->regs.acrs[15])
+ child->thread.acrs[15] = (unsigned int) (data >> 32);
+ else
+ *(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
+
+ } else if (addr == (addr_t) &dummy->regs.orig_gpr2) {
+ /*
+ * orig_gpr2 is stored on the kernel stack
+ */
+ task_pt_regs(child)->orig_gpr2 = data;
+
+ } else if (addr < (addr_t) &dummy->regs.fp_regs) {
+ /*
+ * prevent writes of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ return 0;
+
+ } else if (addr == (addr_t) &dummy->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ if ((unsigned int) data != 0 ||
+ test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+ return -EINVAL;
+ child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32);
+
+ } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fp_regs
+ * or the child->thread.vxrs array
+ */
+ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
+ if (child->thread.vxrs)
+ *(addr_t *)((addr_t)
+ child->thread.vxrs + 2*offset) = data;
+ else
+ *(addr_t *)((addr_t)
+ &child->thread.fp_regs.fprs + offset) = data;
+
+ } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
+ /*
+ * Handle access to the per_info structure.
+ */
+ addr -= (addr_t) &dummy->regs.per_info;
+ __poke_user_per(child, addr, data);
+
+ }
+
+ return 0;
+}
+
+static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+ addr_t mask;
+
+ /*
+ * Stupid gdb peeks/pokes the access registers in 64 bit with
+ * an alignment of 4. Programmers from hell indeed...
+ */
+ mask = __ADDR_MASK;
+ if (addr >= (addr_t) &((struct user *) NULL)->regs.acrs &&
+ addr < (addr_t) &((struct user *) NULL)->regs.orig_gpr2)
+ mask = 3;
+ if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+ return -EIO;
+
+ return __poke_user(child, addr, data);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ ptrace_area parea;
+ int copied, ret;
+
+ switch (request) {
+ case PTRACE_PEEKUSR:
+ /* read the word at location addr in the USER area. */
+ return peek_user(child, addr, data);
+
+ case PTRACE_POKEUSR:
+ /* write the word at location addr in the USER area */
+ return poke_user(child, addr, data);
+
+ case PTRACE_PEEKUSR_AREA:
+ case PTRACE_POKEUSR_AREA:
+ if (copy_from_user(&parea, (void __force __user *) addr,
+ sizeof(parea)))
+ return -EFAULT;
+ addr = parea.kernel_addr;
+ data = parea.process_addr;
+ copied = 0;
+ while (copied < parea.len) {
+ if (request == PTRACE_PEEKUSR_AREA)
+ ret = peek_user(child, addr, data);
+ else {
+ addr_t utmp;
+ if (get_user(utmp,
+ (addr_t __force __user *) data))
+ return -EFAULT;
+ ret = poke_user(child, addr, utmp);
+ }
+ if (ret)
+ return ret;
+ addr += sizeof(unsigned long);
+ data += sizeof(unsigned long);
+ copied += sizeof(unsigned long);
+ }
+ return 0;
+ case PTRACE_GET_LAST_BREAK:
+ put_user(task_thread_info(child)->last_break,
+ (unsigned long __user *) data);
+ return 0;
+ case PTRACE_ENABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+ child->thread.per_flags &= ~PER_FLAG_NO_TE;
+ return 0;
+ case PTRACE_DISABLE_TE:
+ if (!MACHINE_HAS_TE)
+ return -EIO;
+ child->thread.per_flags |= PER_FLAG_NO_TE;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ return 0;
+ case PTRACE_TE_ABORT_RAND:
+ if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+ return -EIO;
+ switch (data) {
+ case 0UL:
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+ break;
+ case 1UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ case 2UL:
+ child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+ child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+ default:
+ /* Removing high order bit from addr (only for 31 bit). */
+ addr &= PSW_ADDR_INSN;
+ return ptrace_request(child, request, addr, data);
+ }
+}
+
+#ifdef CONFIG_COMPAT
+/*
+ * Now the fun part starts... a 31 bit program running in the
+ * 31 bit emulation tracing another program. PTRACE_PEEKTEXT,
+ * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy
+ * to handle, the difference to the 64 bit versions of the requests
+ * is that the access is done in multiples of 4 byte instead of
+ * 8 bytes (sizeof(unsigned long) on 31/64 bit).
+ * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA,
+ * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program
+ * is a 31 bit program too, the content of struct user can be
+ * emulated. A 31 bit program peeking into the struct user of
+ * a 64 bit program is a no-no.
+ */
+
+/*
+ * Same as peek_user_per but for a 31 bit program.
+ */
+static inline __u32 __peek_user_per_compat(struct task_struct *child,
+ addr_t addr)
+{
+ struct compat_per_struct_kernel *dummy32 = NULL;
+
+ if (addr == (addr_t) &dummy32->cr9)
+ /* Control bits of the active per set. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ PER_EVENT_IFETCH : child->thread.per_user.control;
+ else if (addr == (addr_t) &dummy32->cr10)
+ /* Start address of the active per set. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ 0 : child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy32->cr11)
+ /* End address of the active per set. */
+ return test_thread_flag(TIF_SINGLE_STEP) ?
+ PSW32_ADDR_INSN : child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy32->bits)
+ /* Single-step bit. */
+ return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+ 0x80000000 : 0;
+ else if (addr == (addr_t) &dummy32->starting_addr)
+ /* Start address of the user specified per set. */
+ return (__u32) child->thread.per_user.start;
+ else if (addr == (addr_t) &dummy32->ending_addr)
+ /* End address of the user specified per set. */
+ return (__u32) child->thread.per_user.end;
+ else if (addr == (addr_t) &dummy32->perc_atmid)
+ /* PER code, ATMID and AI of the last PER trap */
+ return (__u32) child->thread.per_event.cause << 16;
+ else if (addr == (addr_t) &dummy32->address)
+ /* Address of the last PER trap */
+ return (__u32) child->thread.per_event.address;
+ else if (addr == (addr_t) &dummy32->access_id)
+ /* Access id of the last PER trap */
+ return (__u32) child->thread.per_event.paid << 24;
+ return 0;
+}
+
+/*
+ * Same as peek_user but for a 31 bit program.
+ */
+static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
+{
+ struct compat_user *dummy32 = NULL;
+ addr_t offset;
+ __u32 tmp;
+
+ if (addr < (addr_t) &dummy32->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
+ /*
+ * psw and gprs are stored on the stack
+ */
+ if (addr == (addr_t) &dummy32->regs.psw.mask) {
+ /* Fake a 31 bit psw mask. */
+ tmp = (__u32)(regs->psw.mask >> 32);
+ tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
+ tmp |= PSW32_USER_BITS;
+ } else if (addr == (addr_t) &dummy32->regs.psw.addr) {
+ /* Fake a 31 bit psw address. */
+ tmp = (__u32) regs->psw.addr |
+ (__u32)(regs->psw.mask & PSW_MASK_BA);
+ } else {
+ /* gpr 0-15 */
+ tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
+ }
+ } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
+ /*
+ * access registers are stored in the thread structure
+ */
+ offset = addr - (addr_t) &dummy32->regs.acrs;
+ tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
+
+ } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
+ /*
+ * orig_gpr2 is stored on the kernel stack
+ */
+ tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
+
+ } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+ /*
+ * prevent reads of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ tmp = 0;
+
+ } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ tmp = child->thread.fp_regs.fpc;
+
+ } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fp_regs
+ * or the child->thread.vxrs array
+ */
+ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+ if (child->thread.vxrs)
+ tmp = *(__u32 *)
+ ((addr_t) child->thread.vxrs + 2*offset);
+ else
+ tmp = *(__u32 *)
+ ((addr_t) &child->thread.fp_regs.fprs + offset);
+
+ } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
+ /*
+ * Handle access to the per_info structure.
+ */
+ addr -= (addr_t) &dummy32->regs.per_info;
+ tmp = __peek_user_per_compat(child, addr);
+
+ } else
+ tmp = 0;
+
+ return tmp;
+}
+
+static int peek_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ __u32 tmp;
+
+ if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
+ return -EIO;
+
+ tmp = __peek_user_compat(child, addr);
+ return put_user(tmp, (__u32 __user *) data);
+}
+
+/*
+ * Same as poke_user_per but for a 31 bit program.
+ */
+static inline void __poke_user_per_compat(struct task_struct *child,
+ addr_t addr, __u32 data)
+{
+ struct compat_per_struct_kernel *dummy32 = NULL;
+
+ if (addr == (addr_t) &dummy32->cr9)
+ /* PER event mask of the user specified per set. */
+ child->thread.per_user.control =
+ data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+ else if (addr == (addr_t) &dummy32->starting_addr)
+ /* Starting address of the user specified per set. */
+ child->thread.per_user.start = data;
+ else if (addr == (addr_t) &dummy32->ending_addr)
+ /* Ending address of the user specified per set. */
+ child->thread.per_user.end = data;
+}
+
+/*
+ * Same as poke_user but for a 31 bit program.
+ */
+static int __poke_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ struct compat_user *dummy32 = NULL;
+ __u32 tmp = (__u32) data;
+ addr_t offset;
+
+ if (addr < (addr_t) &dummy32->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
+ /*
+ * psw, gprs, acrs and orig_gpr2 are stored on the stack
+ */
+ if (addr == (addr_t) &dummy32->regs.psw.mask) {
+ __u32 mask = PSW32_MASK_USER;
+
+ mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
+ /* Build a 64 bit psw mask from 31 bit mask. */
+ if ((tmp ^ PSW32_USER_BITS) & ~mask)
+ /* Invalid psw mask. */
+ return -EINVAL;
+ if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+ /* Invalid address-space-control bits */
+ return -EINVAL;
+ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+ (regs->psw.mask & PSW_MASK_BA) |
+ (__u64)(tmp & mask) << 32;
+ } else if (addr == (addr_t) &dummy32->regs.psw.addr) {
+ /* Build a 64 bit psw address from 31 bit address. */
+ regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
+ /* Transfer 31 bit amode bit to psw mask. */
+ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
+ (__u64)(tmp & PSW32_ADDR_AMODE);
+ } else {
+ /* gpr 0-15 */
+ *(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
+ }
+ } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) {
+ /*
+ * access registers are stored in the thread structure
+ */
+ offset = addr - (addr_t) &dummy32->regs.acrs;
+ *(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
+
+ } else if (addr == (addr_t) (&dummy32->regs.orig_gpr2)) {
+ /*
+ * orig_gpr2 is stored on the kernel stack
+ */
+ *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
+
+ } else if (addr < (addr_t) &dummy32->regs.fp_regs) {
+ /*
+ * prevent writess of padding hole between
+ * orig_gpr2 and fp_regs on s390.
+ */
+ return 0;
+
+ } else if (addr == (addr_t) &dummy32->regs.fp_regs.fpc) {
+ /*
+ * floating point control reg. is in the thread structure
+ */
+ if (test_fp_ctl(tmp))
+ return -EINVAL;
+ child->thread.fp_regs.fpc = data;
+
+ } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) {
+ /*
+ * floating point regs. are either in child->thread.fp_regs
+ * or the child->thread.vxrs array
+ */
+ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
+ if (child->thread.vxrs)
+ *(__u32 *)((addr_t)
+ child->thread.vxrs + 2*offset) = tmp;
+ else
+ *(__u32 *)((addr_t)
+ &child->thread.fp_regs.fprs + offset) = tmp;
+
+ } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
+ /*
+ * Handle access to the per_info structure.
+ */
+ addr -= (addr_t) &dummy32->regs.per_info;
+ __poke_user_per_compat(child, addr, data);
+ }
+
+ return 0;
+}
+
+static int poke_user_compat(struct task_struct *child,
+ addr_t addr, addr_t data)
+{
+ if (!is_compat_task() || (addr & 3) ||
+ addr > sizeof(struct compat_user) - 3)
+ return -EIO;
+
+ return __poke_user_compat(child, addr, data);
+}
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ compat_ulong_t caddr, compat_ulong_t cdata)
+{
+ unsigned long addr = caddr;
+ unsigned long data = cdata;
+ compat_ptrace_area parea;
+ int copied, ret;
+
+ switch (request) {
+ case PTRACE_PEEKUSR:
+ /* read the word at location addr in the USER area. */
+ return peek_user_compat(child, addr, data);
+
+ case PTRACE_POKEUSR:
+ /* write the word at location addr in the USER area */
+ return poke_user_compat(child, addr, data);
+
+ case PTRACE_PEEKUSR_AREA:
+ case PTRACE_POKEUSR_AREA:
+ if (copy_from_user(&parea, (void __force __user *) addr,
+ sizeof(parea)))
+ return -EFAULT;
+ addr = parea.kernel_addr;
+ data = parea.process_addr;
+ copied = 0;
+ while (copied < parea.len) {
+ if (request == PTRACE_PEEKUSR_AREA)
+ ret = peek_user_compat(child, addr, data);
+ else {
+ __u32 utmp;
+ if (get_user(utmp,
+ (__u32 __force __user *) data))
+ return -EFAULT;
+ ret = poke_user_compat(child, addr, utmp);
+ }
+ if (ret)
+ return ret;
+ addr += sizeof(unsigned int);
+ data += sizeof(unsigned int);
+ copied += sizeof(unsigned int);
+ }
+ return 0;
+ case PTRACE_GET_LAST_BREAK:
+ put_user(task_thread_info(child)->last_break,
+ (unsigned int __user *) data);
+ return 0;
+ }
+ return compat_ptrace_request(child, request, addr, data);
+}
+#endif
+
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ long ret = 0;
+
+ /* Do the secure computing check first. */
+ if (secure_computing()) {
+ /* seccomp failures shouldn't expose any additional code. */
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * The sysc_tracesys code in entry.S stored the system
+ * call number to gprs[2].
+ */
+ if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+ (tracehook_report_syscall_entry(regs) ||
+ regs->gprs[2] >= NR_syscalls)) {
+ /*
+ * Tracing decided this syscall should not happen or the
+ * debugger stored an invalid system call number. Skip
+ * the system call and the system call restart handling.
+ */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ ret = -1;
+ }
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gprs[2]);
+
+ audit_syscall_entry(regs->gprs[2], regs->orig_gpr2,
+ regs->gprs[3], regs->gprs[4],
+ regs->gprs[5]);
+out:
+ return ret ?: regs->gprs[2];
+}
+
+asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
+{
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->gprs[2]);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, 0);
+}
+
+/*
+ * user_regset definitions.
+ */
+
+static int s390_regs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ while (count > 0) {
+ *k++ = __peek_user(target, pos);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ unsigned long __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(__peek_user(target, pos), u++))
+ return -EFAULT;
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+ return 0;
+}
+
+static int s390_regs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc = 0;
+
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ const unsigned long *k = kbuf;
+ while (count > 0 && !rc) {
+ rc = __poke_user(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const unsigned long __user *u = ubuf;
+ while (count > 0 && !rc) {
+ unsigned long word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ rc = __poke_user(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+
+ if (rc == 0 && target == current)
+ restore_access_regs(target->thread.acrs);
+
+ return rc;
+}
+
+static int s390_fpregs_get(struct task_struct *target,
+ const struct user_regset *regset, unsigned int pos,
+ unsigned int count, void *kbuf, void __user *ubuf)
+{
+ if (target == current) {
+ save_fp_ctl(&target->thread.fp_regs.fpc);
+ save_fp_regs(target->thread.fp_regs.fprs);
+ } else if (target->thread.vxrs) {
+ int i;
+
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ target->thread.fp_regs.fprs[i] =
+ *(freg_t *)(target->thread.vxrs + i);
+ }
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fp_regs, 0, -1);
+}
+
+static int s390_fpregs_set(struct task_struct *target,
+ const struct user_regset *regset, unsigned int pos,
+ unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int rc = 0;
+
+ if (target == current) {
+ save_fp_ctl(&target->thread.fp_regs.fpc);
+ save_fp_regs(target->thread.fp_regs.fprs);
+ }
+
+ /* If setting FPC, must validate it first. */
+ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
+ u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 };
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
+ 0, offsetof(s390_fp_regs, fprs));
+ if (rc)
+ return rc;
+ if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+ return -EINVAL;
+ target->thread.fp_regs.fpc = ufpc[0];
+ }
+
+ if (rc == 0 && count > 0)
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.fp_regs.fprs,
+ offsetof(s390_fp_regs, fprs), -1);
+
+ if (rc == 0) {
+ if (target == current) {
+ restore_fp_ctl(&target->thread.fp_regs.fpc);
+ restore_fp_regs(target->thread.fp_regs.fprs);
+ } else if (target->thread.vxrs) {
+ int i;
+
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *(freg_t *)(target->thread.vxrs + i) =
+ target->thread.fp_regs.fprs[i];
+ }
+ }
+
+ return rc;
+}
+
+static int s390_last_break_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (count > 0) {
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ *k = task_thread_info(target)->last_break;
+ } else {
+ unsigned long __user *u = ubuf;
+ if (__put_user(task_thread_info(target)->last_break, u))
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static int s390_last_break_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+static int s390_tdb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct pt_regs *regs = task_pt_regs(target);
+ unsigned char *data;
+
+ if (!(regs->int_code & 0x200))
+ return -ENODATA;
+ data = target->thread.trap_tdb;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+}
+
+static int s390_tdb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+static int s390_vxrs_low_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ if (!MACHINE_HAS_VX)
+ return -ENODEV;
+ if (target->thread.vxrs) {
+ if (target == current)
+ save_vx_regs(target->thread.vxrs);
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+ } else
+ memset(vxrs, 0, sizeof(vxrs));
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_low_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i, rc;
+
+ if (!MACHINE_HAS_VX)
+ return -ENODEV;
+ if (!target->thread.vxrs) {
+ rc = alloc_vector_registers(target);
+ if (rc)
+ return rc;
+ } else if (target == current)
+ save_vx_regs(target->thread.vxrs);
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ if (rc == 0) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
+ if (target == current)
+ restore_vx_regs(target->thread.vxrs);
+ }
+
+ return rc;
+}
+
+static int s390_vxrs_high_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ __vector128 vxrs[__NUM_VXRS_HIGH];
+
+ if (!MACHINE_HAS_VX)
+ return -ENODEV;
+ if (target->thread.vxrs) {
+ if (target == current)
+ save_vx_regs(target->thread.vxrs);
+ memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+ sizeof(vxrs));
+ } else
+ memset(vxrs, 0, sizeof(vxrs));
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_high_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc;
+
+ if (!MACHINE_HAS_VX)
+ return -ENODEV;
+ if (!target->thread.vxrs) {
+ rc = alloc_vector_registers(target);
+ if (rc)
+ return rc;
+ } else if (target == current)
+ save_vx_regs(target->thread.vxrs);
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
+ if (rc == 0 && target == current)
+ restore_vx_regs(target->thread.vxrs);
+
+ return rc;
+}
+
+static int s390_system_call_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ unsigned int *data = &task_thread_info(target)->system_call;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(unsigned int));
+}
+
+static int s390_system_call_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned int *data = &task_thread_info(target)->system_call;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(unsigned int));
+}
+
+static const struct user_regset s390_regsets[] = {
+ {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(s390_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_regs_get,
+ .set = s390_regs_set,
+ },
+ {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(s390_fp_regs) / sizeof(long),
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_fpregs_get,
+ .set = s390_fpregs_set,
+ },
+ {
+ .core_note_type = NT_S390_SYSTEM_CALL,
+ .n = 1,
+ .size = sizeof(unsigned int),
+ .align = sizeof(unsigned int),
+ .get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
+ {
+ .core_note_type = NT_S390_LAST_BREAK,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_last_break_get,
+ .set = s390_last_break_set,
+ },
+ {
+ .core_note_type = NT_S390_TDB,
+ .n = 1,
+ .size = 256,
+ .align = 1,
+ .get = s390_tdb_get,
+ .set = s390_tdb_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_LOW,
+ .n = __NUM_VXRS_LOW,
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_vxrs_low_get,
+ .set = s390_vxrs_low_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_HIGH,
+ .n = __NUM_VXRS_HIGH,
+ .size = sizeof(__vector128),
+ .align = sizeof(__vector128),
+ .get = s390_vxrs_high_get,
+ .set = s390_vxrs_high_set,
+ },
+};
+
+static const struct user_regset_view user_s390_view = {
+ .name = UTS_MACHINE,
+ .e_machine = EM_S390,
+ .regsets = s390_regsets,
+ .n = ARRAY_SIZE(s390_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int s390_compat_regs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *k++ = __peek_user_compat(target, pos);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ compat_ulong_t __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(__peek_user_compat(target, pos), u++))
+ return -EFAULT;
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_regs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc = 0;
+
+ if (target == current)
+ save_access_regs(target->thread.acrs);
+
+ if (kbuf) {
+ const compat_ulong_t *k = kbuf;
+ while (count > 0 && !rc) {
+ rc = __poke_user_compat(target, pos, *k++);
+ count -= sizeof(*k);
+ pos += sizeof(*k);
+ }
+ } else {
+ const compat_ulong_t __user *u = ubuf;
+ while (count > 0 && !rc) {
+ compat_ulong_t word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ rc = __poke_user_compat(target, pos, word);
+ count -= sizeof(*u);
+ pos += sizeof(*u);
+ }
+ }
+
+ if (rc == 0 && target == current)
+ restore_access_regs(target->thread.acrs);
+
+ return rc;
+}
+
+static int s390_compat_regs_high_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ compat_ulong_t *gprs_high;
+
+ gprs_high = (compat_ulong_t *)
+ &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+ if (kbuf) {
+ compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *k++ = *gprs_high;
+ gprs_high += 2;
+ count -= sizeof(*k);
+ }
+ } else {
+ compat_ulong_t __user *u = ubuf;
+ while (count > 0) {
+ if (__put_user(*gprs_high, u++))
+ return -EFAULT;
+ gprs_high += 2;
+ count -= sizeof(*u);
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_regs_high_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ compat_ulong_t *gprs_high;
+ int rc = 0;
+
+ gprs_high = (compat_ulong_t *)
+ &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+ if (kbuf) {
+ const compat_ulong_t *k = kbuf;
+ while (count > 0) {
+ *gprs_high = *k++;
+ *gprs_high += 2;
+ count -= sizeof(*k);
+ }
+ } else {
+ const compat_ulong_t __user *u = ubuf;
+ while (count > 0 && !rc) {
+ unsigned long word;
+ rc = __get_user(word, u++);
+ if (rc)
+ break;
+ *gprs_high = word;
+ *gprs_high += 2;
+ count -= sizeof(*u);
+ }
+ }
+
+ return rc;
+}
+
+static int s390_compat_last_break_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ compat_ulong_t last_break;
+
+ if (count > 0) {
+ last_break = task_thread_info(target)->last_break;
+ if (kbuf) {
+ unsigned long *k = kbuf;
+ *k = last_break;
+ } else {
+ unsigned long __user *u = ubuf;
+ if (__put_user(last_break, u))
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static int s390_compat_last_break_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return 0;
+}
+
+static const struct user_regset s390_compat_regsets[] = {
+ {
+ .core_note_type = NT_PRSTATUS,
+ .n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_compat_regs_get,
+ .set = s390_compat_regs_set,
+ },
+ {
+ .core_note_type = NT_PRFPREG,
+ .n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_fpregs_get,
+ .set = s390_fpregs_set,
+ },
+ {
+ .core_note_type = NT_S390_SYSTEM_CALL,
+ .n = 1,
+ .size = sizeof(compat_uint_t),
+ .align = sizeof(compat_uint_t),
+ .get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
+ {
+ .core_note_type = NT_S390_LAST_BREAK,
+ .n = 1,
+ .size = sizeof(long),
+ .align = sizeof(long),
+ .get = s390_compat_last_break_get,
+ .set = s390_compat_last_break_set,
+ },
+ {
+ .core_note_type = NT_S390_TDB,
+ .n = 1,
+ .size = 256,
+ .align = 1,
+ .get = s390_tdb_get,
+ .set = s390_tdb_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_LOW,
+ .n = __NUM_VXRS_LOW,
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_vxrs_low_get,
+ .set = s390_vxrs_low_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_HIGH,
+ .n = __NUM_VXRS_HIGH,
+ .size = sizeof(__vector128),
+ .align = sizeof(__vector128),
+ .get = s390_vxrs_high_get,
+ .set = s390_vxrs_high_set,
+ },
+ {
+ .core_note_type = NT_S390_HIGH_GPRS,
+ .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
+ .size = sizeof(compat_long_t),
+ .align = sizeof(compat_long_t),
+ .get = s390_compat_regs_high_get,
+ .set = s390_compat_regs_high_set,
+ },
+};
+
+static const struct user_regset_view user_s390_compat_view = {
+ .name = "s390",
+ .e_machine = EM_S390,
+ .regsets = s390_compat_regsets,
+ .n = ARRAY_SIZE(s390_compat_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT))
+ return &user_s390_compat_view;
+#endif
+ return &user_s390_view;
+}
+
+static const char *gpr_names[NUM_GPRS] = {
+ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return 0;
+ return regs->gprs[offset];
+}
+
+int regs_query_register_offset(const char *name)
+{
+ unsigned long offset;
+
+ if (!name || *name != 'r')
+ return -EINVAL;
+ if (kstrtoul(name + 1, 10, &offset))
+ return -EINVAL;
+ if (offset >= NUM_GPRS)
+ return -EINVAL;
+ return offset;
+}
+
+const char *regs_query_register_name(unsigned int offset)
+{
+ if (offset >= NUM_GPRS)
+ return NULL;
+ return gpr_names[offset];
+}
+
+static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ unsigned long ksp = kernel_stack_pointer(regs);
+
+ return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long addr;
+
+ addr = kernel_stack_pointer(regs) + n * sizeof(long);
+ if (!regs_within_kernel_stack(regs, addr))
+ return 0;
+ return *(unsigned long *)addr;
+}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
new file mode 100644
index 000000000..52aab0bd8
--- /dev/null
+++ b/arch/s390/kernel/reipl.S
@@ -0,0 +1,155 @@
+/*
+ * Copyright IBM Corp 2000, 2011
+ * Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ * Denis Joseph Barrow,
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigp.h>
+
+#
+# store_status
+#
+# Prerequisites to run this function:
+# - Prefix register is set to zero
+# - Original prefix register is stored in "dump_prefix_page"
+# - Lowcore protection is off
+#
+ENTRY(store_status)
+ /* Save register one and load save area base */
+ stg %r1,__LC_SAVE_AREA_RESTART
+ lghi %r1,SAVE_AREA_BASE
+ /* General purpose registers */
+ stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ lg %r2,__LC_SAVE_AREA_RESTART
+ stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1)
+ /* Control registers */
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* Access registers */
+ stam %a0,%a15,__LC_AREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* Floating point registers */
+ std %f0, 0x00 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f1, 0x08 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f2, 0x10 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f3, 0x18 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f4, 0x20 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f5, 0x28 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f6, 0x30 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f7, 0x38 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f8, 0x40 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f9, 0x48 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f10,0x50 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f11,0x58 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f12,0x60 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f13,0x68 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f14,0x70 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ std %f15,0x78 + __LC_FPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* Floating point control register */
+ stfpc __LC_FP_CREG_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* CPU timer */
+ stpt __LC_CPU_TIMER_SAVE_AREA-SAVE_AREA_BASE(%r1)
+ /* Saved prefix register */
+ larl %r2,dump_prefix_page
+ mvc __LC_PREFIX_SAVE_AREA-SAVE_AREA_BASE(4,%r1),0(%r2)
+ /* Clock comparator - seven bytes */
+ larl %r2,.Lclkcmp
+ stckc 0(%r2)
+ mvc __LC_CLOCK_COMP_SAVE_AREA-SAVE_AREA_BASE + 1(7,%r1),1(%r2)
+ /* Program status word */
+ epsw %r2,%r3
+ st %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 0(%r1)
+ st %r3,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 4(%r1)
+ larl %r2,store_status
+ stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
+ br %r14
+
+ .section .bss
+ .align 8
+.Lclkcmp: .quad 0x0000000000000000
+ .previous
+
+#
+# do_reipl_asm
+# Parameter: r2 = schid of reipl device
+#
+
+ENTRY(do_reipl_asm)
+ basr %r13,0
+.Lpg0: lpswe .Lnewpsw-.Lpg0(%r13)
+.Lpg1: brasl %r14,store_status
+
+ lctlg %c6,%c6,.Lall-.Lpg0(%r13)
+ lgr %r1,%r2
+ mvc __LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
+ stsch .Lschib-.Lpg0(%r13)
+ oi .Lschib+5-.Lpg0(%r13),0x84
+.Lecs: xi .Lschib+27-.Lpg0(%r13),0x01
+ msch .Lschib-.Lpg0(%r13)
+ lghi %r0,5
+.Lssch: ssch .Liplorb-.Lpg0(%r13)
+ jz .L001
+ brct %r0,.Lssch
+ bas %r14,.Ldisab-.Lpg0(%r13)
+.L001: mvc __LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13)
+.Ltpi: lpswe .Lwaitpsw-.Lpg0(%r13)
+.Lcont: c %r1,__LC_SUBCHANNEL_ID
+ jnz .Ltpi
+ clc __LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13)
+ jnz .Ltpi
+ tsch .Liplirb-.Lpg0(%r13)
+ tm .Liplirb+9-.Lpg0(%r13),0xbf
+ jz .L002
+ bas %r14,.Ldisab-.Lpg0(%r13)
+.L002: tm .Liplirb+8-.Lpg0(%r13),0xf3
+ jz .L003
+ bas %r14,.Ldisab-.Lpg0(%r13)
+.L003: st %r1,__LC_SUBCHANNEL_ID
+ lhi %r1,0 # mode 0 = esa
+ slr %r0,%r0 # set cpuid to zero
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode
+ lpsw 0
+.Ldisab: sll %r14,1
+ srl %r14,1 # need to kill hi bit to avoid specification exceptions.
+ st %r14,.Ldispsw+12-.Lpg0(%r13)
+ lpswe .Ldispsw-.Lpg0(%r13)
+ .align 8
+.Lall: .quad 0x00000000ff000000
+ .align 16
+/*
+ * These addresses have to be 31 bit otherwise
+ * the sigp will throw a specifcation exception
+ * when switching to ESA mode as bit 31 be set
+ * in the ESA psw.
+ * Bit 31 of the addresses has to be 0 for the
+ * 31bit lpswe instruction a fact they appear to have
+ * omitted from the pop.
+ */
+.Lnewpsw: .quad 0x0000000080000000
+ .quad .Lpg1
+.Lpcnew: .quad 0x0000000080000000
+ .quad .Lecs
+.Lionew: .quad 0x0000000080000000
+ .quad .Lcont
+.Lwaitpsw: .quad 0x0202000080000000
+ .quad .Ltpi
+.Ldispsw: .quad 0x0002000080000000
+ .quad 0x0000000000000000
+.Liplccws: .long 0x02000000,0x60000018
+ .long 0x08000008,0x20000001
+.Liplorb: .long 0x0049504c,0x0040ff80
+ .long 0x00000000+.Liplccws
+.Lschib: .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+.Liplirb: .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
+ .long 0x00000000,0x00000000
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
new file mode 100644
index 000000000..cfac28330
--- /dev/null
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -0,0 +1,121 @@
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/sigp.h>
+
+/*
+ * moves the new kernel to its destination...
+ * %r2 = pointer to first kimage_entry_t
+ * %r3 = start address - where to jump to after the job is done...
+ *
+ * %r5 will be used as temp. storage
+ * %r6 holds the destination address
+ * %r7 = PAGE_SIZE
+ * %r8 holds the source address
+ * %r9 = PAGE_SIZE
+ *
+ * 0xf000 is a page_mask
+ */
+
+ .text
+ENTRY(relocate_kernel)
+ basr %r13,0 # base address
+ .base:
+ stnsm sys_msk-.base(%r13),0xfb # disable DAT
+ stctg %c0,%c15,ctlregs-.base(%r13)
+ stmg %r0,%r15,gprregs-.base(%r13)
+ lghi %r0,3
+ sllg %r0,%r0,31
+ stg %r0,0x1d0(%r0)
+ la %r0,.back_pgm-.base(%r13)
+ stg %r0,0x1d8(%r0)
+ la %r1,load_psw-.base(%r13)
+ mvc 0(8,%r0),0(%r1)
+ la %r0,.back-.base(%r13)
+ st %r0,4(%r0)
+ oi 4(%r0),0x80
+ lghi %r0,0
+ diag %r0,%r0,0x308
+ .back:
+ lhi %r1,1 # mode 1 = esame
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode
+ sam64 # switch to 64 bit addressing mode
+ basr %r13,0
+ .back_base:
+ oi have_diag308-.back_base(%r13),0x01
+ lctlg %c0,%c15,ctlregs-.back_base(%r13)
+ lmg %r0,%r15,gprregs-.back_base(%r13)
+ j .top
+ .back_pgm:
+ lmg %r0,%r15,gprregs-.base(%r13)
+ .top:
+ lghi %r7,4096 # load PAGE_SIZE in r7
+ lghi %r9,4096 # load PAGE_SIZE in r9
+ lg %r5,0(%r2) # read another word for indirection page
+ aghi %r2,8 # increment pointer
+ tml %r5,0x1 # is it a destination page?
+ je .indir_check # NO, goto "indir_check"
+ lgr %r6,%r5 # r6 = r5
+ nill %r6,0xf000 # mask it out and...
+ j .top # ...next iteration
+ .indir_check:
+ tml %r5,0x2 # is it a indirection page?
+ je .done_test # NO, goto "done_test"
+ nill %r5,0xf000 # YES, mask out,
+ lgr %r2,%r5 # move it into the right register,
+ j .top # and read next...
+ .done_test:
+ tml %r5,0x4 # is it the done indicator?
+ je .source_test # NO! Well, then it should be the source indicator...
+ j .done # ok, lets finish it here...
+ .source_test:
+ tml %r5,0x8 # it should be a source indicator...
+ je .top # NO, ignore it...
+ lgr %r8,%r5 # r8 = r5
+ nill %r8,0xf000 # masking
+ 0: mvcle %r6,%r8,0x0 # copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+ jo 0b
+ j .top
+ .done:
+ sgr %r0,%r0 # clear register r0
+ la %r4,load_psw-.base(%r13) # load psw-address into the register
+ o %r3,4(%r4) # or load address into psw
+ st %r3,4(%r4)
+ mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
+ tm have_diag308-.base(%r13),0x01
+ jno .no_diag308
+ diag %r0,%r0,0x308
+ .no_diag308:
+ sam31 # 31 bit mode
+ sr %r1,%r1 # erase register r1
+ sr %r2,%r2 # erase register r2
+ sigp %r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
+ lpsw 0 # hopefully start new kernel...
+
+ .align 8
+ load_psw:
+ .long 0x00080000,0x80000000
+ sys_msk:
+ .quad 0
+ ctlregs:
+ .rept 16
+ .quad 0
+ .endr
+ gprregs:
+ .rept 16
+ .quad 0
+ .endr
+ have_diag308:
+ .byte 0
+ .align 8
+ relocate_kernel_end:
+ .align 8
+ .globl relocate_kernel_len
+ relocate_kernel_len:
+ .quad relocate_kernel_end - relocate_kernel
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
new file mode 100644
index 000000000..26b4ae96f
--- /dev/null
+++ b/arch/s390/kernel/runtime_instr.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright IBM Corp. 2012
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <asm/runtime_instr.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+/* empty control block to disable RI by loading it */
+struct runtime_instr_cb runtime_instr_empty_cb;
+
+static int runtime_instr_avail(void)
+{
+ return test_facility(64);
+}
+
+static void disable_runtime_instr(void)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+
+ load_runtime_instr_cb(&runtime_instr_empty_cb);
+
+ /*
+ * Make sure the RI bit is deleted from the PSW. If the user did not
+ * switch off RI before the system call the process will get a
+ * specification exception otherwise.
+ */
+ regs->psw.mask &= ~PSW_MASK_RI;
+}
+
+static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+ cb->buf_limit = 0xfff;
+ cb->int_requested = 1;
+ cb->pstate = 1;
+ cb->pstate_set_buf = 1;
+ cb->pstate_sample = 1;
+ cb->pstate_collect = 1;
+ cb->key = PAGE_DEFAULT_KEY;
+ cb->valid = 1;
+}
+
+void exit_thread_runtime_instr(void)
+{
+ struct task_struct *task = current;
+
+ if (!task->thread.ri_cb)
+ return;
+ disable_runtime_instr();
+ kfree(task->thread.ri_cb);
+ task->thread.ri_signum = 0;
+ task->thread.ri_cb = NULL;
+}
+
+static void runtime_instr_int_handler(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ struct siginfo info;
+
+ if (!(param32 & CPU_MF_INT_RI_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMR);
+
+ if (!current->thread.ri_cb)
+ return;
+ if (current->thread.ri_signum < SIGRTMIN ||
+ current->thread.ri_signum > SIGRTMAX) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.si_signo = current->thread.ri_signum;
+ info.si_code = SI_QUEUE;
+ if (param32 & CPU_MF_INT_RI_BUF_FULL)
+ info.si_int = ENOBUFS;
+ else if (param32 & CPU_MF_INT_RI_HALTED)
+ info.si_int = ECANCELED;
+ else
+ return; /* unknown reason */
+
+ send_sig_info(current->thread.ri_signum, &info, current);
+}
+
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+{
+ struct runtime_instr_cb *cb;
+
+ if (!runtime_instr_avail())
+ return -EOPNOTSUPP;
+
+ if (command == S390_RUNTIME_INSTR_STOP) {
+ preempt_disable();
+ exit_thread_runtime_instr();
+ preempt_enable();
+ return 0;
+ }
+
+ if (command != S390_RUNTIME_INSTR_START ||
+ (signum < SIGRTMIN || signum > SIGRTMAX))
+ return -EINVAL;
+
+ if (!current->thread.ri_cb) {
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb)
+ return -ENOMEM;
+ } else {
+ cb = current->thread.ri_cb;
+ memset(cb, 0, sizeof(*cb));
+ }
+
+ init_runtime_instr_cb(cb);
+ current->thread.ri_signum = signum;
+
+ /* now load the control block to make it available */
+ preempt_disable();
+ current->thread.ri_cb = cb;
+ load_runtime_instr_cb(cb);
+ preempt_enable();
+ return 0;
+}
+
+static int __init runtime_instr_init(void)
+{
+ int rc;
+
+ if (!runtime_instr_avail())
+ return 0;
+
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ runtime_instr_int_handler);
+ if (rc)
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+ else
+ pr_info("Runtime instrumentation facility initialized\n");
+ return rc;
+}
+device_initcall(runtime_instr_init);
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
new file mode 100644
index 000000000..9f6046793
--- /dev/null
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -0,0 +1,13 @@
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+EXPORT_SYMBOL(sie64a);
+EXPORT_SYMBOL(sie_exit);
+#endif
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S
new file mode 100644
index 000000000..43c3169ea
--- /dev/null
+++ b/arch/s390/kernel/sclp.S
@@ -0,0 +1,351 @@
+/*
+ * Mini SCLP driver.
+ *
+ * Copyright IBM Corp. 2004, 2009
+ *
+ * Author(s): Peter Oberparleiter <Peter.Oberparleiter@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/irq.h>
+
+LC_EXT_NEW_PSW = 0x58 # addr of ext int handler
+LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit
+LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter
+LC_EXT_INT_CODE = 0x86 # addr of ext int code
+LC_AR_MODE_ID = 0xa3
+
+#
+# Subroutine which waits synchronously until either an external interruption
+# or a timeout occurs.
+#
+# Parameters:
+# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds
+#
+# Returns:
+# R2 = 0 on interrupt, 2 on timeout
+# R3 = external interruption parameter if R2=0
+#
+
+_sclp_wait_int:
+ stm %r6,%r15,24(%r15) # save registers
+ basr %r13,0 # get base register
+.LbaseS1:
+ ahi %r15,-96 # create stack frame
+ la %r8,LC_EXT_NEW_PSW # register int handler
+ la %r9,.LextpswS1-.LbaseS1(%r13)
+ tm LC_AR_MODE_ID,1
+ jno .Lesa1
+ la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit
+ la %r9,.LextpswS1_64-.LbaseS1(%r13)
+.Lesa1:
+ mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8)
+ mvc 0(16,%r8),0(%r9)
+ epsw %r6,%r7 # set current addressing mode
+ nill %r6,0x1 # in new psw (31 or 64 bit mode)
+ nilh %r7,0x8000
+ stm %r6,%r7,0(%r8)
+ lhi %r6,0x0200 # cr mask for ext int (cr0.54)
+ ltr %r2,%r2
+ jz .LsetctS1
+ ahi %r6,0x0800 # cr mask for clock int (cr0.52)
+ stck .LtimeS1-.LbaseS1(%r13) # initiate timeout
+ al %r2,.LtimeS1-.LbaseS1(%r13)
+ st %r2,.LtimeS1-.LbaseS1(%r13)
+ sckc .LtimeS1-.LbaseS1(%r13)
+
+.LsetctS1:
+ stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts
+ l %r0,.LctlS1-.LbaseS1(%r13)
+ lhi %r1,~(0x200 | 0x800) # clear old values
+ nr %r1,%r0
+ or %r1,%r6 # set new value
+ st %r1,.LctlS1-.LbaseS1(%r13)
+ lctl %c0,%c0,.LctlS1-.LbaseS1(%r13)
+ st %r0,.LctlS1-.LbaseS1(%r13)
+ lhi %r2,2 # return code for timeout
+.LloopS1:
+ lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt
+.LwaitS1:
+ lh %r7,LC_EXT_INT_CODE
+ chi %r7,EXT_IRQ_CLK_COMP # timeout?
+ je .LtimeoutS1
+ chi %r7,EXT_IRQ_SERVICE_SIG # service int?
+ jne .LloopS1
+ sr %r2,%r2
+ l %r3,LC_EXT_INT_PARAM
+.LtimeoutS1:
+ lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting
+ # restore old handler
+ mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13)
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14 # return to caller
+
+ .align 8
+.LoldpswS1:
+ .long 0, 0, 0, 0 # old ext int PSW
+.LextpswS1:
+ .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int
+.LextpswS1_64:
+ .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit
+.LwaitpswS1:
+ .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int
+.LtimeS1:
+ .quad 0 # current time
+.LctlS1:
+ .long 0 # CT0 contents
+
+#
+# Subroutine to synchronously issue a service call.
+#
+# Parameters:
+# R2 = command word
+# R3 = sccb address
+#
+# Returns:
+# R2 = 0 on success, 1 on failure
+# R3 = sccb response code if R2 = 0
+#
+
+_sclp_servc:
+ stm %r6,%r15,24(%r15) # save registers
+ ahi %r15,-96 # create stack frame
+ lr %r6,%r2 # save command word
+ lr %r7,%r3 # save sccb address
+.LretryS2:
+ lhi %r2,1 # error return code
+ .insn rre,0xb2200000,%r6,%r7 # servc
+ brc 1,.LendS2 # exit if not operational
+ brc 8,.LnotbusyS2 # go on if not busy
+ sr %r2,%r2 # wait until no longer busy
+ bras %r14,_sclp_wait_int
+ j .LretryS2 # retry
+.LnotbusyS2:
+ sr %r2,%r2 # wait until result
+ bras %r14,_sclp_wait_int
+ sr %r2,%r2
+ lh %r3,6(%r7)
+.LendS2:
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14
+
+#
+# Subroutine to set up the SCLP interface.
+#
+# Parameters:
+# R2 = 0 to activate, non-zero to deactivate
+#
+# Returns:
+# R2 = 0 on success, non-zero on failure
+#
+
+_sclp_setup:
+ stm %r6,%r15,24(%r15) # save registers
+ ahi %r15,-96 # create stack frame
+ basr %r13,0 # get base register
+.LbaseS3:
+ l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb
+ mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13)
+ ltr %r2,%r2 # initialization?
+ jz .LdoinitS3 # go ahead
+ # clear masks
+ xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6)
+.LdoinitS3:
+ l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word
+ lr %r3,%r6 # get sccb address
+ bras %r14,_sclp_servc # issue service call
+ ltr %r2,%r2 # servc successful?
+ jnz .LerrorS3
+ chi %r3,0x20 # write mask successful?
+ jne .LerrorS3
+ # check masks
+ la %r2,.LinitmaskS3-.LinitsccbS3(%r6)
+ l %r1,0(%r2) # receive mask ok?
+ n %r1,12(%r2)
+ cl %r1,0(%r2)
+ jne .LerrorS3
+ l %r1,4(%r2) # send mask ok?
+ n %r1,8(%r2)
+ cl %r1,4(%r2)
+ sr %r2,%r2
+ je .LendS3
+.LerrorS3:
+ lhi %r2,1 # error return code
+.LendS3:
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14
+.LwritemaskS3:
+ .long 0x00780005 # SCLP command for write mask
+.LinitsccbS3:
+ .word .LinitendS3-.LinitsccbS3
+ .byte 0,0,0,0
+ .word 0
+ .word 0
+ .word 4
+.LinitmaskS3:
+ .long 0x80000000
+ .long 0x40000000
+ .long 0
+ .long 0
+.LinitendS3:
+
+#
+# Subroutine which prints a given text to the SCLP console.
+#
+# Parameters:
+# R2 = address of nil-terminated ASCII text
+#
+# Returns:
+# R2 = 0 on success, 1 on failure
+#
+
+_sclp_print:
+ stm %r6,%r15,24(%r15) # save registers
+ ahi %r15,-96 # create stack frame
+ basr %r13,0 # get base register
+.LbaseS4:
+ l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb
+ mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13)
+ la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr
+ sr %r0,%r0
+ l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table
+.LinitmtoS4:
+ # initialize mto
+ mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13)
+ lhi %r6,.LmtoendS4-.LmtoS4 # current mto length
+.LloopS4:
+ ic %r0,0(%r2) # get character
+ ahi %r2,1
+ ltr %r0,%r0 # end of string?
+ jz .LfinalizemtoS4
+ chi %r0,0x0a # end of line (NL)?
+ jz .LfinalizemtoS4
+ stc %r0,0(%r6,%r7) # copy to mto
+ la %r11,0(%r6,%r7)
+ tr 0(1,%r11),0(%r10) # translate to EBCDIC
+ ahi %r6,1
+ j .LloopS4
+.LfinalizemtoS4:
+ sth %r6,0(%r7) # update mto length
+ lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length
+ ar %r9,%r6
+ sth %r9,.LmdbS4-.LwritesccbS4(%r8)
+ lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length
+ ar %r9,%r6
+ sth %r9,.LevbufS4-.LwritesccbS4(%r8)
+ lh %r9,0(%r8) # update sccb length
+ ar %r9,%r6
+ sth %r9,0(%r8)
+ ar %r7,%r6 # update current mto address
+ ltr %r0,%r0 # more characters?
+ jnz .LinitmtoS4
+ l %r2,.LwritedataS4-.LbaseS4(%r13)# write data
+ lr %r3,%r8
+ bras %r14,_sclp_servc
+ ltr %r2,%r2 # servc successful?
+ jnz .LendS4
+ chi %r3,0x20 # write data successful?
+ je .LendS4
+ lhi %r2,1 # error return code
+.LendS4:
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14
+
+#
+# Function which prints a given text to the SCLP console.
+#
+# Parameters:
+# R2 = address of nil-terminated ASCII text
+#
+# Returns:
+# R2 = 0 on success, 1 on failure
+#
+
+ENTRY(_sclp_print_early)
+ stm %r6,%r15,24(%r15) # save registers
+ ahi %r15,-96 # create stack frame
+ tm LC_AR_MODE_ID,1
+ jno .Lesa2
+ ahi %r15,-80
+ stmh %r6,%r15,96(%r15) # store upper register halves
+.Lesa2:
+ lr %r10,%r2 # save string pointer
+ lhi %r2,0
+ bras %r14,_sclp_setup # enable console
+ ltr %r2,%r2
+ jnz .LendS5
+ lr %r2,%r10
+ bras %r14,_sclp_print # print string
+ ltr %r2,%r2
+ jnz .LendS5
+ lhi %r2,1
+ bras %r14,_sclp_setup # disable console
+.LendS5:
+ tm LC_AR_MODE_ID,1
+ jno .Lesa3
+ lgfr %r2,%r2 # sign extend return value
+ lmh %r6,%r15,96(%r15) # restore upper register halves
+ ahi %r15,80
+.Lesa3:
+ lm %r6,%r15,120(%r15) # restore registers
+ br %r14
+
+.LwritedataS4:
+ .long 0x00760005 # SCLP command for write data
+.LwritesccbS4:
+ # sccb
+ .word .LmtoS4-.LwritesccbS4
+ .byte 0
+ .byte 0,0,0
+ .word 0
+
+ # evbuf
+.LevbufS4:
+ .word .LmtoS4-.LevbufS4
+ .byte 0x02
+ .byte 0
+ .word 0
+
+.LmdbS4:
+ # mdb
+ .word .LmtoS4-.LmdbS4
+ .word 1
+ .long 0xd4c4c240
+ .long 1
+
+ # go
+.LgoS4:
+ .word .LmtoS4-.LgoS4
+ .word 1
+ .long 0
+ .byte 0,0,0,0,0,0,0,0
+ .byte 0,0,0
+ .byte 0
+ .byte 0,0,0,0,0,0,0
+ .byte 0
+ .word 0
+ .byte 0,0,0,0,0,0,0,0,0,0
+ .byte 0,0,0,0,0,0,0,0
+ .byte 0,0,0,0,0,0,0,0
+
+.LmtoS4:
+ .word .LmtoendS4-.LmtoS4
+ .word 4
+ .word 0x1000
+ .byte 0
+ .byte 0,0,0
+.LmtoendS4:
+
+ # Global constants
+.LsccbS0:
+ .long _sclp_work_area
+.Lascebc:
+ .long _ascebc
+
+.section .data,"aw",@progbits
+ .balign 4096
+_sclp_work_area:
+ .fill 4096
+.previous
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
new file mode 100644
index 000000000..7262fe438
--- /dev/null
+++ b/arch/s390/kernel/setup.c
@@ -0,0 +1,892 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Hartmut Penner (hp@de.ibm.com),
+ * Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * Derived from "arch/i386/kernel/setup.c"
+ * Copyright (C) 1995, Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/bootmem.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/kernel_stat.h>
+#include <linux/device.h>
+#include <linux/notifier.h>
+#include <linux/pfn.h>
+#include <linux/ctype.h>
+#include <linux/reboot.h>
+#include <linux/topology.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/compat.h>
+
+#include <asm/ipl.h>
+#include <asm/facility.h>
+#include <asm/smp.h>
+#include <asm/mmu_context.h>
+#include <asm/cpcmd.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/ebcdic.h>
+#include <asm/kvm_virtio.h>
+#include <asm/diag.h>
+#include <asm/os_info.h>
+#include <asm/sclp.h>
+#include <asm/sysinfo.h>
+#include "entry.h"
+
+/*
+ * Machine setup..
+ */
+unsigned int console_mode = 0;
+EXPORT_SYMBOL(console_mode);
+
+unsigned int console_devno = -1;
+EXPORT_SYMBOL(console_devno);
+
+unsigned int console_irq = -1;
+EXPORT_SYMBOL(console_irq);
+
+unsigned long elf_hwcap = 0;
+char elf_platform[ELF_PLATFORM_SIZE];
+
+int __initdata memory_end_set;
+unsigned long __initdata memory_end;
+unsigned long __initdata max_physmem_end;
+
+unsigned long VMALLOC_START;
+EXPORT_SYMBOL(VMALLOC_START);
+
+unsigned long VMALLOC_END;
+EXPORT_SYMBOL(VMALLOC_END);
+
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
+
+unsigned long MODULES_VADDR;
+unsigned long MODULES_END;
+
+/* An array with a pointer to the lowcore of every CPU. */
+struct _lowcore *lowcore_ptr[NR_CPUS];
+EXPORT_SYMBOL(lowcore_ptr);
+
+/*
+ * This is set up by the setup-routine at boot-time
+ * for S390 need to find out, what we have to setup
+ * using address 0x10400 ...
+ */
+
+#include <asm/setup.h>
+
+/*
+ * condev= and conmode= setup parameter.
+ */
+
+static int __init condev_setup(char *str)
+{
+ int vdev;
+
+ vdev = simple_strtoul(str, &str, 0);
+ if (vdev >= 0 && vdev < 65536) {
+ console_devno = vdev;
+ console_irq = -1;
+ }
+ return 1;
+}
+
+__setup("condev=", condev_setup);
+
+static void __init set_preferred_console(void)
+{
+ if (MACHINE_IS_KVM) {
+ if (sclp_has_vt220())
+ add_preferred_console("ttyS", 1, NULL);
+ else if (sclp_has_linemode())
+ add_preferred_console("ttyS", 0, NULL);
+ else
+ add_preferred_console("hvc", 0, NULL);
+ } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+ add_preferred_console("ttyS", 0, NULL);
+ else if (CONSOLE_IS_3270)
+ add_preferred_console("tty3270", 0, NULL);
+}
+
+static int __init conmode_setup(char *str)
+{
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
+ SET_CONSOLE_SCLP;
+#endif
+#if defined(CONFIG_TN3215_CONSOLE)
+ if (strncmp(str, "3215", 5) == 0)
+ SET_CONSOLE_3215;
+#endif
+#if defined(CONFIG_TN3270_CONSOLE)
+ if (strncmp(str, "3270", 5) == 0)
+ SET_CONSOLE_3270;
+#endif
+ set_preferred_console();
+ return 1;
+}
+
+__setup("conmode=", conmode_setup);
+
+static void __init conmode_default(void)
+{
+ char query_buffer[1024];
+ char *ptr;
+
+ if (MACHINE_IS_VM) {
+ cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
+ console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
+ ptr = strstr(query_buffer, "SUBCHANNEL =");
+ console_irq = simple_strtoul(ptr + 13, NULL, 16);
+ cpcmd("QUERY TERM", query_buffer, 1024, NULL);
+ ptr = strstr(query_buffer, "CONMODE");
+ /*
+ * Set the conmode to 3215 so that the device recognition
+ * will set the cu_type of the console to 3215. If the
+ * conmode is 3270 and we don't set it back then both
+ * 3215 and the 3270 driver will try to access the console
+ * device (3215 as console and 3270 as normal tty).
+ */
+ cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
+ if (ptr == NULL) {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ SET_CONSOLE_SCLP;
+#endif
+ return;
+ }
+ if (strncmp(ptr + 8, "3270", 4) == 0) {
+#if defined(CONFIG_TN3270_CONSOLE)
+ SET_CONSOLE_3270;
+#elif defined(CONFIG_TN3215_CONSOLE)
+ SET_CONSOLE_3215;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ SET_CONSOLE_SCLP;
+#endif
+ } else if (strncmp(ptr + 8, "3215", 4) == 0) {
+#if defined(CONFIG_TN3215_CONSOLE)
+ SET_CONSOLE_3215;
+#elif defined(CONFIG_TN3270_CONSOLE)
+ SET_CONSOLE_3270;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ SET_CONSOLE_SCLP;
+#endif
+ }
+ } else {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+ SET_CONSOLE_SCLP;
+#endif
+ }
+}
+
+#ifdef CONFIG_CRASH_DUMP
+static void __init setup_zfcpdump(void)
+{
+ if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+ return;
+ if (OLDMEM_BASE)
+ return;
+ strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+ console_loglevel = 2;
+}
+#else
+static inline void setup_zfcpdump(void) {}
+#endif /* CONFIG_CRASH_DUMP */
+
+ /*
+ * Reboot, halt and power_off stubs. They just call _machine_restart,
+ * _machine_halt or _machine_power_off.
+ */
+
+void machine_restart(char *command)
+{
+ if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
+ /*
+ * Only unblank the console if we are called in enabled
+ * context or a bust_spinlocks cleared the way for us.
+ */
+ console_unblank();
+ _machine_restart(command);
+}
+
+void machine_halt(void)
+{
+ if (!in_interrupt() || oops_in_progress)
+ /*
+ * Only unblank the console if we are called in enabled
+ * context or a bust_spinlocks cleared the way for us.
+ */
+ console_unblank();
+ _machine_halt();
+}
+
+void machine_power_off(void)
+{
+ if (!in_interrupt() || oops_in_progress)
+ /*
+ * Only unblank the console if we are called in enabled
+ * context or a bust_spinlocks cleared the way for us.
+ */
+ console_unblank();
+ _machine_power_off();
+}
+
+/*
+ * Dummy power off function.
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+static int __init early_parse_mem(char *p)
+{
+ memory_end = memparse(p, &p);
+ memory_end &= PAGE_MASK;
+ memory_end_set = 1;
+ return 0;
+}
+early_param("mem", early_parse_mem);
+
+static int __init parse_vmalloc(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+ VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
+ return 0;
+}
+early_param("vmalloc", parse_vmalloc);
+
+void *restart_stack __attribute__((__section__(".data")));
+
+static void __init setup_lowcore(void)
+{
+ struct _lowcore *lc;
+
+ /*
+ * Setup lowcore for boot cpu
+ */
+ BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096);
+ lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
+ lc->restart_psw.mask = PSW_KERNEL_BITS;
+ lc->restart_psw.addr =
+ PSW_ADDR_AMODE | (unsigned long) restart_int_handler;
+ lc->external_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_DAT | PSW_MASK_MCHECK;
+ lc->external_new_psw.addr =
+ PSW_ADDR_AMODE | (unsigned long) ext_int_handler;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_DAT | PSW_MASK_MCHECK;
+ lc->program_new_psw.addr =
+ PSW_ADDR_AMODE | (unsigned long) pgm_check_handler;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
+ lc->mcck_new_psw.addr =
+ PSW_ADDR_AMODE | (unsigned long) mcck_int_handler;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS |
+ PSW_MASK_DAT | PSW_MASK_MCHECK;
+ lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
+ lc->clock_comparator = -1ULL;
+ lc->kernel_stack = ((unsigned long) &init_thread_union)
+ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->async_stack = (unsigned long)
+ __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0)
+ + ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->panic_stack = (unsigned long)
+ __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0)
+ + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->current_task = (unsigned long) init_thread_union.thread_info.task;
+ lc->thread_info = (unsigned long) &init_thread_union;
+ lc->machine_flags = S390_lowcore.machine_flags;
+ lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
+ memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+ MAX_FACILITY_BIT/8);
+ if (MACHINE_HAS_VX)
+ lc->vector_save_area_addr =
+ (unsigned long) &lc->vector_save_area;
+ lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
+ lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
+ lc->async_enter_timer = S390_lowcore.async_enter_timer;
+ lc->exit_timer = S390_lowcore.exit_timer;
+ lc->user_timer = S390_lowcore.user_timer;
+ lc->system_timer = S390_lowcore.system_timer;
+ lc->steal_timer = S390_lowcore.steal_timer;
+ lc->last_update_timer = S390_lowcore.last_update_timer;
+ lc->last_update_clock = S390_lowcore.last_update_clock;
+
+ restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
+ restart_stack += ASYNC_SIZE;
+
+ /*
+ * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
+ * restart data to the absolute zero lowcore. This is necessary if
+ * PSW restart is done on an offline CPU that has lowcore zero.
+ */
+ lc->restart_stack = (unsigned long) restart_stack;
+ lc->restart_fn = (unsigned long) do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1UL;
+
+ /* Setup absolute zero lowcore */
+ mem_assign_absolute(S390_lowcore.restart_stack, lc->restart_stack);
+ mem_assign_absolute(S390_lowcore.restart_fn, lc->restart_fn);
+ mem_assign_absolute(S390_lowcore.restart_data, lc->restart_data);
+ mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
+ mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
+
+#ifdef CONFIG_SMP
+ lc->spinlock_lockval = arch_spin_lockval(0);
+#endif
+
+ set_prefix((u32)(unsigned long) lc);
+ lowcore_ptr[0] = lc;
+}
+
+static struct resource code_resource = {
+ .name = "Kernel code",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource data_resource = {
+ .name = "Kernel data",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource bss_resource = {
+ .name = "Kernel bss",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM,
+};
+
+static struct resource __initdata *standard_resources[] = {
+ &code_resource,
+ &data_resource,
+ &bss_resource,
+};
+
+static void __init setup_resources(void)
+{
+ struct resource *res, *std_res, *sub_res;
+ struct memblock_region *reg;
+ int j;
+
+ code_resource.start = (unsigned long) &_text;
+ code_resource.end = (unsigned long) &_etext - 1;
+ data_resource.start = (unsigned long) &_etext;
+ data_resource.end = (unsigned long) &_edata - 1;
+ bss_resource.start = (unsigned long) &__bss_start;
+ bss_resource.end = (unsigned long) &__bss_stop - 1;
+
+ for_each_memblock(memory, reg) {
+ res = alloc_bootmem_low(sizeof(*res));
+ res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+
+ res->name = "System RAM";
+ res->start = reg->base;
+ res->end = reg->base + reg->size - 1;
+ request_resource(&iomem_resource, res);
+
+ for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
+ std_res = standard_resources[j];
+ if (std_res->start < res->start ||
+ std_res->start > res->end)
+ continue;
+ if (std_res->end > res->end) {
+ sub_res = alloc_bootmem_low(sizeof(*sub_res));
+ *sub_res = *std_res;
+ sub_res->end = res->end;
+ std_res->start = res->end + 1;
+ request_resource(res, sub_res);
+ } else {
+ request_resource(res, std_res);
+ }
+ }
+ }
+}
+
+static void __init setup_memory_end(void)
+{
+ unsigned long vmax, vmalloc_size, tmp;
+
+ /* Choose kernel address space layout: 2, 3, or 4 levels. */
+ vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
+ tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
+ tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
+ if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42))
+ vmax = 1UL << 42; /* 3-level kernel page table */
+ else
+ vmax = 1UL << 53; /* 4-level kernel page table */
+ /* module area is at the end of the kernel address space. */
+ MODULES_END = vmax;
+ MODULES_VADDR = MODULES_END - MODULES_LEN;
+ VMALLOC_END = MODULES_VADDR;
+ VMALLOC_START = vmax - vmalloc_size;
+
+ /* Split remaining virtual space between 1:1 mapping & vmemmap array */
+ tmp = VMALLOC_START / (PAGE_SIZE + sizeof(struct page));
+ /* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
+ tmp = SECTION_ALIGN_UP(tmp);
+ tmp = VMALLOC_START - tmp * sizeof(struct page);
+ tmp &= ~((vmax >> 11) - 1); /* align to page table level */
+ tmp = min(tmp, 1UL << MAX_PHYSMEM_BITS);
+ vmemmap = (struct page *) tmp;
+
+ /* Take care that memory_end is set and <= vmemmap */
+ memory_end = min(memory_end ?: max_physmem_end, tmp);
+ max_pfn = max_low_pfn = PFN_DOWN(memory_end);
+ memblock_remove(memory_end, ULONG_MAX);
+
+ pr_notice("Max memory size: %luMB\n", memory_end >> 20);
+}
+
+static void __init setup_vmcoreinfo(void)
+{
+ mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * When kdump is enabled, we have to ensure that no memory from
+ * the area [0 - crashkernel memory size] and
+ * [crashk_res.start - crashk_res.end] is set offline.
+ */
+static int kdump_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct memory_notify *arg = data;
+
+ if (action != MEM_GOING_OFFLINE)
+ return NOTIFY_OK;
+ if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
+ return NOTIFY_BAD;
+ if (arg->start_pfn > PFN_DOWN(crashk_res.end))
+ return NOTIFY_OK;
+ if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
+ return NOTIFY_OK;
+ return NOTIFY_BAD;
+}
+
+static struct notifier_block kdump_mem_nb = {
+ .notifier_call = kdump_mem_notifier,
+};
+
+#endif
+
+/*
+ * Make sure that the area behind memory_end is protected
+ */
+static void reserve_memory_end(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
+ !OLDMEM_BASE && sclp_get_hsa_size()) {
+ memory_end = sclp_get_hsa_size();
+ memory_end &= PAGE_MASK;
+ memory_end_set = 1;
+ }
+#endif
+ if (!memory_end_set)
+ return;
+ memblock_reserve(memory_end, ULONG_MAX);
+}
+
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void reserve_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (OLDMEM_BASE)
+ /* Forget all memory above the running kdump system */
+ memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
+
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void remove_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (OLDMEM_BASE)
+ /* Forget all memory above the running kdump system */
+ memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
+
+/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+static void __init reserve_crashkernel(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ unsigned long long crash_base, crash_size;
+ phys_addr_t low, high;
+ int rc;
+
+ rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
+ &crash_base);
+
+ crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
+ crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
+ if (rc || crash_size == 0)
+ return;
+
+ if (memblock.memory.regions[0].size < crash_size) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "first memory chunk must be at least crashkernel size");
+ return;
+ }
+
+ low = crash_base ?: OLDMEM_BASE;
+ high = low + crash_size;
+ if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
+ /* The crashkernel fits into OLDMEM, reuse OLDMEM */
+ crash_base = low;
+ } else {
+ /* Find suitable area in free memory */
+ low = max_t(unsigned long, crash_size, sclp_get_hsa_size());
+ high = crash_base ? crash_base + crash_size : ULONG_MAX;
+
+ if (crash_base && crash_base < low) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "crash_base too low");
+ return;
+ }
+ low = crash_base ?: low;
+ crash_base = memblock_find_in_range(low, high, crash_size,
+ KEXEC_CRASH_MEM_ALIGN);
+ }
+
+ if (!crash_base) {
+ pr_info("crashkernel reservation failed: %s\n",
+ "no suitable area found");
+ return;
+ }
+
+ if (register_memory_notifier(&kdump_mem_nb))
+ return;
+
+ if (!OLDMEM_BASE && MACHINE_IS_VM)
+ diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ insert_resource(&iomem_resource, &crashk_res);
+ memblock_remove(crash_base, crash_size);
+ pr_info("Reserving %lluMB of memory at %lluMB "
+ "for crashkernel (System RAM: %luMB)\n",
+ crash_size >> 20, crash_base >> 20,
+ (unsigned long)memblock.memory.total_size >> 20);
+ os_info_crashkernel_add(crash_base, crash_size);
+#endif
+}
+
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_start = INITRD_START;
+ initrd_end = initrd_start + INITRD_SIZE;
+ memblock_reserve(INITRD_START, INITRD_SIZE);
+#endif
+}
+
+/*
+ * Check for initrd being in usable memory
+ */
+static void __init check_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (INITRD_START && INITRD_SIZE &&
+ !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
+ pr_err("initrd does not fit memory.\n");
+ memblock_free(INITRD_START, INITRD_SIZE);
+ initrd_start = initrd_end = 0;
+ }
+#endif
+}
+
+/*
+ * Reserve all kernel text
+ */
+static void __init reserve_kernel(void)
+{
+ unsigned long start_pfn;
+ start_pfn = PFN_UP(__pa(&_end));
+
+ /*
+ * Reserve memory used for lowcore/command line/kernel image.
+ */
+ memblock_reserve(0, (unsigned long)_ehead);
+ memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
+ - (unsigned long)_stext);
+}
+
+static void __init reserve_elfcorehdr(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel())
+ memblock_reserve(elfcorehdr_addr - OLDMEM_BASE,
+ PAGE_ALIGN(elfcorehdr_size));
+#endif
+}
+
+static void __init setup_memory(void)
+{
+ struct memblock_region *reg;
+
+ /*
+ * Init storage key for present memory
+ */
+ for_each_memblock(memory, reg) {
+ storage_key_init_range(reg->base, reg->base + reg->size);
+ }
+ psw_set_key(PAGE_DEFAULT_KEY);
+
+ /* Only cosmetics */
+ memblock_enforce_memory_limit(memblock_end_of_DRAM());
+}
+
+/*
+ * Setup hardware capabilities.
+ */
+static void __init setup_hwcaps(void)
+{
+ static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
+ struct cpuid cpu_id;
+ int i;
+
+ /*
+ * The store facility list bits numbers as found in the principles
+ * of operation are numbered with bit 1UL<<31 as number 0 to
+ * bit 1UL<<0 as number 31.
+ * Bit 0: instructions named N3, "backported" to esa-mode
+ * Bit 2: z/Architecture mode is active
+ * Bit 7: the store-facility-list-extended facility is installed
+ * Bit 17: the message-security assist is installed
+ * Bit 19: the long-displacement facility is installed
+ * Bit 21: the extended-immediate facility is installed
+ * Bit 22: extended-translation facility 3 is installed
+ * Bit 30: extended-translation facility 3 enhancement facility
+ * These get translated to:
+ * HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
+ * HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
+ * HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
+ * HWCAP_S390_ETF3EH bit 8 (22 && 30).
+ */
+ for (i = 0; i < 6; i++)
+ if (test_facility(stfl_bits[i]))
+ elf_hwcap |= 1UL << i;
+
+ if (test_facility(22) && test_facility(30))
+ elf_hwcap |= HWCAP_S390_ETF3EH;
+
+ /*
+ * Check for additional facilities with store-facility-list-extended.
+ * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
+ * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
+ * as stored by stfl, bits 32-xxx contain additional facilities.
+ * How many facility words are stored depends on the number of
+ * doublewords passed to the instruction. The additional facilities
+ * are:
+ * Bit 42: decimal floating point facility is installed
+ * Bit 44: perform floating point operation facility is installed
+ * translated to:
+ * HWCAP_S390_DFP bit 6 (42 && 44).
+ */
+ if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
+ elf_hwcap |= HWCAP_S390_DFP;
+
+ /*
+ * Huge page support HWCAP_S390_HPAGE is bit 7.
+ */
+ if (MACHINE_HAS_HPAGE)
+ elf_hwcap |= HWCAP_S390_HPAGE;
+
+ /*
+ * 64-bit register support for 31-bit processes
+ * HWCAP_S390_HIGH_GPRS is bit 9.
+ */
+ elf_hwcap |= HWCAP_S390_HIGH_GPRS;
+
+ /*
+ * Transactional execution support HWCAP_S390_TE is bit 10.
+ */
+ if (test_facility(50) && test_facility(73))
+ elf_hwcap |= HWCAP_S390_TE;
+
+ /*
+ * Vector extension HWCAP_S390_VXRS is bit 11.
+ */
+ if (test_facility(129))
+ elf_hwcap |= HWCAP_S390_VXRS;
+ get_cpu_id(&cpu_id);
+ add_device_randomness(&cpu_id, sizeof(cpu_id));
+ switch (cpu_id.machine) {
+ case 0x9672:
+ strcpy(elf_platform, "g5");
+ break;
+ case 0x2064:
+ case 0x2066:
+ default: /* Use "z900" as default for 64 bit kernels. */
+ strcpy(elf_platform, "z900");
+ break;
+ case 0x2084:
+ case 0x2086:
+ strcpy(elf_platform, "z990");
+ break;
+ case 0x2094:
+ case 0x2096:
+ strcpy(elf_platform, "z9-109");
+ break;
+ case 0x2097:
+ case 0x2098:
+ strcpy(elf_platform, "z10");
+ break;
+ case 0x2817:
+ case 0x2818:
+ strcpy(elf_platform, "z196");
+ break;
+ case 0x2827:
+ case 0x2828:
+ strcpy(elf_platform, "zEC12");
+ break;
+ case 0x2964:
+ strcpy(elf_platform, "z13");
+ break;
+ }
+}
+
+/*
+ * Add system information as device randomness
+ */
+static void __init setup_randomness(void)
+{
+ struct sysinfo_3_2_2 *vmms;
+
+ vmms = (struct sysinfo_3_2_2 *) alloc_page(GFP_KERNEL);
+ if (vmms && stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+ add_device_randomness(&vmms, vmms->count);
+ free_page((unsigned long) vmms);
+}
+
+/*
+ * Setup function called from init/main.c just after the banner
+ * was printed.
+ */
+
+void __init setup_arch(char **cmdline_p)
+{
+ /*
+ * print what head.S has found out about the machine
+ */
+ if (MACHINE_IS_VM)
+ pr_info("Linux is running as a z/VM "
+ "guest operating system in 64-bit mode\n");
+ else if (MACHINE_IS_KVM)
+ pr_info("Linux is running under KVM in 64-bit mode\n");
+ else if (MACHINE_IS_LPAR)
+ pr_info("Linux is running natively in 64-bit mode\n");
+
+ /* Have one command line that is parsed and saved in /proc/cmdline */
+ /* boot_command_line has been already set up in early.c */
+ *cmdline_p = boot_command_line;
+
+ ROOT_DEV = Root_RAM0;
+
+ /* Is init_mm really needed? */
+ init_mm.start_code = PAGE_OFFSET;
+ init_mm.end_code = (unsigned long) &_etext;
+ init_mm.end_data = (unsigned long) &_edata;
+ init_mm.brk = (unsigned long) &_end;
+
+ parse_early_param();
+ os_info_init();
+ setup_ipl();
+
+ /* Do some memory reservations *before* memory is added to memblock */
+ reserve_memory_end();
+ reserve_oldmem();
+ reserve_kernel();
+ reserve_initrd();
+ reserve_elfcorehdr();
+ memblock_allow_resize();
+
+ /* Get information about *all* installed memory */
+ detect_memory_memblock();
+
+ remove_oldmem();
+
+ /*
+ * Make sure all chunks are MAX_ORDER aligned so we don't need the
+ * extra checks that HOLES_IN_ZONE would require.
+ *
+ * Is this still required?
+ */
+ memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+
+ setup_memory_end();
+ setup_memory();
+
+ check_initrd();
+ reserve_crashkernel();
+
+ setup_resources();
+ setup_vmcoreinfo();
+ setup_lowcore();
+ smp_fill_possible_mask();
+ cpu_init();
+
+ /*
+ * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
+ */
+ setup_hwcaps();
+
+ /*
+ * Create kernel page tables and switch to virtual addressing.
+ */
+ paging_init();
+
+ /* Setup default console */
+ conmode_default();
+ set_preferred_console();
+
+ /* Setup zfcpdump support */
+ setup_zfcpdump();
+
+ /* Add system specific data to the random pool */
+ setup_randomness();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
new file mode 100644
index 000000000..c551f22ce
--- /dev/null
+++ b/arch/s390/kernel/signal.c
@@ -0,0 +1,559 @@
+/*
+ * Copyright IBM Corp. 1999, 2006
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ * Based on Intel version
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/tracehook.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+/*
+ * Layout of an old-style signal-frame:
+ * -----------------------------------------
+ * | save area (_SIGNAL_FRAMESIZE) |
+ * -----------------------------------------
+ * | struct sigcontext |
+ * | oldmask |
+ * | _sigregs * |
+ * -----------------------------------------
+ * | _sigregs with |
+ * | _s390_regs_common |
+ * | _s390_fp_regs |
+ * -----------------------------------------
+ * | int signo |
+ * -----------------------------------------
+ * | _sigregs_ext with |
+ * | gprs_high 64 byte (opt) |
+ * | vxrs_low 128 byte (opt) |
+ * | vxrs_high 256 byte (opt) |
+ * | reserved 128 byte (opt) |
+ * -----------------------------------------
+ * | __u16 svc_insn |
+ * -----------------------------------------
+ * The svc_insn entry with the sigreturn system call opcode does not
+ * have a fixed position and moves if gprs_high or vxrs exist.
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct sigframe
+{
+ __u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+ struct sigcontext sc;
+ _sigregs sregs;
+ int signo;
+ _sigregs_ext sregs_ext;
+ __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */
+};
+
+/*
+ * Layout of an rt signal-frame:
+ * -----------------------------------------
+ * | save area (_SIGNAL_FRAMESIZE) |
+ * -----------------------------------------
+ * | svc __NR_rt_sigreturn 2 byte |
+ * -----------------------------------------
+ * | struct siginfo |
+ * -----------------------------------------
+ * | struct ucontext_extended with |
+ * | unsigned long uc_flags |
+ * | struct ucontext *uc_link |
+ * | stack_t uc_stack |
+ * | _sigregs uc_mcontext with |
+ * | _s390_regs_common |
+ * | _s390_fp_regs |
+ * | sigset_t uc_sigmask |
+ * | _sigregs_ext uc_mcontext_ext |
+ * | gprs_high 64 byte (opt) |
+ * | vxrs_low 128 byte (opt) |
+ * | vxrs_high 256 byte (opt)|
+ * | reserved 128 byte (opt) |
+ * -----------------------------------------
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct rt_sigframe
+{
+ __u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+ __u16 svc_insn;
+ struct siginfo info;
+ struct ucontext_extended uc;
+};
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+ save_access_regs(current->thread.acrs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ if (current->thread.vxrs) {
+ int i;
+
+ save_vx_regs(current->thread.vxrs);
+ for (i = 0; i < __NUM_FPRS; i++)
+ current->thread.fp_regs.fprs[i] =
+ *(freg_t *)(current->thread.vxrs + i);
+ } else
+ save_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+ restore_access_regs(current->thread.acrs);
+ /* restore_fp_ctl is done in restore_sigregs */
+ if (current->thread.vxrs) {
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ *(freg_t *)(current->thread.vxrs + i) =
+ current->thread.fp_regs.fprs[i];
+ restore_vx_regs(current->thread.vxrs);
+ } else
+ restore_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+ _sigregs user_sregs;
+
+ /* Copy a 'clean' PSW mask to the user to avoid leaking
+ information about whether PER is currently on. */
+ user_sregs.regs.psw.mask = PSW_USER_BITS |
+ (regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+ user_sregs.regs.psw.addr = regs->psw.addr;
+ memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
+ memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+ sizeof(user_sregs.regs.acrs));
+ memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
+ sizeof(user_sregs.fpregs));
+ if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
+ return -EFAULT;
+ return 0;
+}
+
+static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+ _sigregs user_sregs;
+
+ /* Alwys make any pending restarted system call return -EINTR */
+ current->restart_block.fn = do_no_restart_syscall;
+
+ if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
+ return -EFAULT;
+
+ if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+ return -EINVAL;
+
+ /* Loading the floating-point-control word can fail. Do that first. */
+ if (restore_fp_ctl(&user_sregs.fpregs.fpc))
+ return -EINVAL;
+
+ /* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+ regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+ (user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+ /* Check for invalid user address space control. */
+ if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+ regs->psw.mask = PSW_ASC_PRIMARY |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ /* Check for invalid amode */
+ if (regs->psw.mask & PSW_MASK_EA)
+ regs->psw.mask |= PSW_MASK_BA;
+ regs->psw.addr = user_sregs.regs.psw.addr;
+ memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
+ memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+ sizeof(current->thread.acrs));
+
+ memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
+ sizeof(current->thread.fp_regs));
+
+ clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+ return 0;
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs_ext(struct pt_regs *regs,
+ _sigregs_ext __user *sregs_ext)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Save vector registers to signal stack */
+ if (current->thread.vxrs) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+ if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_to_user(&sregs_ext->vxrs_high,
+ current->thread.vxrs + __NUM_VXRS_LOW,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static int restore_sigregs_ext(struct pt_regs *regs,
+ _sigregs_ext __user *sregs_ext)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Restore vector registers from signal stack */
+ if (current->thread.vxrs) {
+ if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+ &sregs_ext->vxrs_high,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+ }
+ return 0;
+}
+
+SYSCALL_DEFINE0(sigreturn)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ struct sigframe __user *frame =
+ (struct sigframe __user *) regs->gprs[15];
+ sigset_t set;
+
+ if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
+ goto badframe;
+ set_current_blocked(&set);
+ if (restore_sigregs(regs, &frame->sregs))
+ goto badframe;
+ if (restore_sigregs_ext(regs, &frame->sregs_ext))
+ goto badframe;
+ load_sigregs();
+ return regs->gprs[2];
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+ struct rt_sigframe __user *frame =
+ (struct rt_sigframe __user *)regs->gprs[15];
+ sigset_t set;
+
+ if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
+ goto badframe;
+ set_current_blocked(&set);
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+ if (restore_sigregs(regs, &frame->uc.uc_mcontext))
+ goto badframe;
+ if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+ goto badframe;
+ load_sigregs();
+ return regs->gprs[2];
+badframe:
+ force_sig(SIGSEGV, current);
+ return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+ unsigned long sp;
+
+ /* Default to using normal stack */
+ sp = regs->gprs[15];
+
+ /* Overflow on alternate signal stack gives SIGSEGV. */
+ if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+ return (void __user *) -1UL;
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ if (! sas_ss_flags(sp))
+ sp = current->sas_ss_sp + current->sas_ss_size;
+ }
+
+ return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame(int sig, struct k_sigaction *ka,
+ sigset_t *set, struct pt_regs * regs)
+{
+ struct sigframe __user *frame;
+ struct sigcontext sc;
+ unsigned long restorer;
+ size_t frame_size;
+
+ /*
+ * gprs_high are only present for a 31-bit task running on
+ * a 64-bit kernel (see compat_signal.c) but the space for
+ * gprs_high need to be allocated if vector registers are
+ * included in the signal frame on a 31-bit system.
+ */
+ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
+ if (MACHINE_HAS_VX)
+ frame_size += sizeof(frame->sregs_ext);
+ frame = get_sigframe(ka, regs, frame_size);
+ if (frame == (void __user *) -1UL)
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+ return -EFAULT;
+
+ /* Create struct sigcontext on the signal stack */
+ memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE);
+ sc.sregs = (_sigregs __user __force *) &frame->sregs;
+ if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create _sigregs on the signal stack */
+ if (save_sigregs(regs, &frame->sregs))
+ return -EFAULT;
+
+ /* Place signal number on stack to allow backtrace from handler. */
+ if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+ return -EFAULT;
+
+ /* Create _sigregs_ext on the signal stack */
+ if (save_sigregs_ext(regs, &frame->sregs_ext))
+ return -EFAULT;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
+ } else {
+ /* Signal frame without vector registers are short ! */
+ __u16 __user *svc = (void __user *) frame + frame_size - 2;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+ }
+
+ /* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
+ regs->gprs[15] = (unsigned long) frame;
+ /* Force default amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+
+ regs->gprs[2] = sig;
+ regs->gprs[3] = (unsigned long) &frame->sc;
+
+ /* We forgot to include these in the sigcontext.
+ To avoid breaking binary compatibility, they are passed as args. */
+ if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+ sig == SIGTRAP || sig == SIGFPE) {
+ /* set extra registers only for synchronous signals */
+ regs->gprs[4] = regs->int_code & 127;
+ regs->gprs[5] = regs->int_parm_long;
+ regs->gprs[6] = task_thread_info(current)->last_break;
+ }
+ return 0;
+}
+
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
+{
+ struct rt_sigframe __user *frame;
+ unsigned long uc_flags, restorer;
+ size_t frame_size;
+
+ frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
+ /*
+ * gprs_high are only present for a 31-bit task running on
+ * a 64-bit kernel (see compat_signal.c) but the space for
+ * gprs_high need to be allocated if vector registers are
+ * included in the signal frame on a 31-bit system.
+ */
+ uc_flags = 0;
+ if (MACHINE_HAS_VX) {
+ frame_size += sizeof(_sigregs_ext);
+ if (current->thread.vxrs)
+ uc_flags |= UC_VXRS;
+ }
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
+ if (frame == (void __user *) -1UL)
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+ return -EFAULT;
+
+ /* Set up to return from userspace. If provided, use a stub
+ already in userspace. */
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long)
+ ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
+ } else {
+ __u16 __user *svc = &frame->svc_insn;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long) svc | PSW_ADDR_AMODE;
+ }
+
+ /* Create siginfo on the signal stack */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create ucontext on the signal stack. */
+ if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+ __put_user(NULL, &frame->uc.uc_link) ||
+ __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+ save_sigregs(regs, &frame->uc.uc_mcontext) ||
+ __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+ save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+ return -EFAULT;
+
+ /* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
+ regs->gprs[15] = (unsigned long) frame;
+ /* Force default amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+ (PSW_USER_BITS & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
+ regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
+
+ regs->gprs[2] = ksig->sig;
+ regs->gprs[3] = (unsigned long) &frame->info;
+ regs->gprs[4] = (unsigned long) &frame->uc;
+ regs->gprs[5] = task_thread_info(current)->last_break;
+ return 0;
+}
+
+static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs)
+{
+ int ret;
+
+ /* Set up the stack frame */
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ ret = setup_rt_frame(ksig, oldset, regs);
+ else
+ ret = setup_frame(ksig->sig, &ksig->ka, oldset, regs);
+
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+void do_signal(struct pt_regs *regs)
+{
+ struct ksignal ksig;
+ sigset_t *oldset = sigmask_to_save();
+
+ /*
+ * Get signal to deliver. When running under ptrace, at this point
+ * the debugger may change all our registers, including the system
+ * call information.
+ */
+ current_thread_info()->system_call =
+ test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
+
+ if (get_signal(&ksig)) {
+ /* Whee! Actually deliver the signal. */
+ if (current_thread_info()->system_call) {
+ regs->int_code = current_thread_info()->system_call;
+ /* Check for system call restarting. */
+ switch (regs->gprs[2]) {
+ case -ERESTART_RESTARTBLOCK:
+ case -ERESTARTNOHAND:
+ regs->gprs[2] = -EINTR;
+ break;
+ case -ERESTARTSYS:
+ if (!(ksig.ka.sa.sa_flags & SA_RESTART)) {
+ regs->gprs[2] = -EINTR;
+ break;
+ }
+ /* fallthrough */
+ case -ERESTARTNOINTR:
+ regs->gprs[2] = regs->orig_gpr2;
+ regs->psw.addr =
+ __rewind_psw(regs->psw,
+ regs->int_code >> 16);
+ break;
+ }
+ }
+ /* No longer in a system call */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+
+ if (is_compat_task())
+ handle_signal32(&ksig, oldset, regs);
+ else
+ handle_signal(&ksig, oldset, regs);
+ return;
+ }
+
+ /* No handlers present - check for system call restart */
+ clear_pt_regs_flag(regs, PIF_SYSCALL);
+ if (current_thread_info()->system_call) {
+ regs->int_code = current_thread_info()->system_call;
+ switch (regs->gprs[2]) {
+ case -ERESTART_RESTARTBLOCK:
+ /* Restart with sys_restart_syscall */
+ regs->int_code = __NR_restart_syscall;
+ /* fallthrough */
+ case -ERESTARTNOHAND:
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ /* Restart system call with magic TIF bit. */
+ regs->gprs[2] = regs->orig_gpr2;
+ set_pt_regs_flag(regs, PIF_SYSCALL);
+ if (test_thread_flag(TIF_SINGLE_STEP))
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+ break;
+ }
+ }
+
+ /*
+ * If there's no signal to deliver, we just put the saved sigmask back.
+ */
+ restore_saved_sigmask();
+}
+
+void do_notify_resume(struct pt_regs *regs)
+{
+ clear_thread_flag(TIF_NOTIFY_RESUME);
+ tracehook_notify_resume(regs);
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
new file mode 100644
index 000000000..efd2c1968
--- /dev/null
+++ b/arch/s390/kernel/smp.c
@@ -0,0 +1,1161 @@
+/*
+ * SMP related functions
+ *
+ * Copyright IBM Corp. 1999, 2012
+ * Author(s): Denis Joseph Barrow,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ * Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ * based on other smp stuff by
+ * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net>
+ * (c) 1998 Ingo Molnar
+ *
+ * The code outside of smp.c uses logical cpu numbers, only smp.c does
+ * the translation of logical to physical cpu ids. All new code that
+ * operates on physical cpu numbers needs to go into smp.c.
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irqflags.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/crash_dump.h>
+#include <asm/asm-offsets.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+#include <asm/irq.h>
+#include <asm/tlbflush.h>
+#include <asm/vtimer.h>
+#include <asm/lowcore.h>
+#include <asm/sclp.h>
+#include <asm/vdso.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include <asm/sigp.h>
+#include <asm/idle.h>
+#include "entry.h"
+
+enum {
+ ec_schedule = 0,
+ ec_call_function_single,
+ ec_stop_cpu,
+};
+
+enum {
+ CPU_STATE_STANDBY,
+ CPU_STATE_CONFIGURED,
+};
+
+static DEFINE_PER_CPU(struct cpu *, cpu_device);
+
+struct pcpu {
+ struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
+ unsigned long ec_mask; /* bit mask for ec_xxx functions */
+ signed char state; /* physical cpu state */
+ signed char polarization; /* physical polarization */
+ u16 address; /* physical cpu address */
+};
+
+static u8 boot_cpu_type;
+static struct pcpu pcpu_devices[NR_CPUS];
+
+unsigned int smp_cpu_mt_shift;
+EXPORT_SYMBOL(smp_cpu_mt_shift);
+
+unsigned int smp_cpu_mtid;
+EXPORT_SYMBOL(smp_cpu_mtid);
+
+static unsigned int smp_max_threads __initdata = -1U;
+
+static int __init early_nosmt(char *s)
+{
+ smp_max_threads = 1;
+ return 0;
+}
+early_param("nosmt", early_nosmt);
+
+static int __init early_smt(char *s)
+{
+ get_option(&s, &smp_max_threads);
+ return 0;
+}
+early_param("smt", early_smt);
+
+/*
+ * The smp_cpu_state_mutex must be held when changing the state or polarization
+ * member of a pcpu data structure within the pcpu_devices arreay.
+ */
+DEFINE_MUTEX(smp_cpu_state_mutex);
+
+/*
+ * Signal processor helper functions.
+ */
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm,
+ u32 *status)
+{
+ int cc;
+
+ while (1) {
+ cc = __pcpu_sigp(addr, order, parm, NULL);
+ if (cc != SIGP_CC_BUSY)
+ return cc;
+ cpu_relax();
+ }
+}
+
+static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
+{
+ int cc, retry;
+
+ for (retry = 0; ; retry++) {
+ cc = __pcpu_sigp(pcpu->address, order, parm, NULL);
+ if (cc != SIGP_CC_BUSY)
+ break;
+ if (retry >= 3)
+ udelay(10);
+ }
+ return cc;
+}
+
+static inline int pcpu_stopped(struct pcpu *pcpu)
+{
+ u32 uninitialized_var(status);
+
+ if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
+ 0, &status) != SIGP_CC_STATUS_STORED)
+ return 0;
+ return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
+}
+
+static inline int pcpu_running(struct pcpu *pcpu)
+{
+ if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING,
+ 0, NULL) != SIGP_CC_STATUS_STORED)
+ return 1;
+ /* Status stored condition code is equivalent to cpu not running. */
+ return 0;
+}
+
+/*
+ * Find struct pcpu by cpu address.
+ */
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
+{
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ if (pcpu_devices[cpu].address == address)
+ return pcpu_devices + cpu;
+ return NULL;
+}
+
+static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
+{
+ int order;
+
+ if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+ return;
+ order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+ pcpu_sigp_retry(pcpu, order, 0);
+}
+
+#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+{
+ unsigned long async_stack, panic_stack;
+ struct _lowcore *lc;
+
+ if (pcpu != &pcpu_devices[0]) {
+ pcpu->lowcore = (struct _lowcore *)
+ __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+ async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+ panic_stack = __get_free_page(GFP_KERNEL);
+ if (!pcpu->lowcore || !panic_stack || !async_stack)
+ goto out;
+ } else {
+ async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
+ panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+ }
+ lc = pcpu->lowcore;
+ memcpy(lc, &S390_lowcore, 512);
+ memset((char *) lc + 512, 0, sizeof(*lc) - 512);
+ lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
+ lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+ lc->cpu_nr = cpu;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
+ if (MACHINE_HAS_VX)
+ lc->vector_save_area_addr =
+ (unsigned long) &lc->vector_save_area;
+ if (vdso_alloc_per_cpu(lc))
+ goto out;
+ lowcore_ptr[cpu] = lc;
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc);
+ return 0;
+out:
+ if (pcpu != &pcpu_devices[0]) {
+ free_page(panic_stack);
+ free_pages(async_stack, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+ }
+ return -ENOMEM;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void pcpu_free_lowcore(struct pcpu *pcpu)
+{
+ pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
+ lowcore_ptr[pcpu - pcpu_devices] = NULL;
+ vdso_free_per_cpu(pcpu->lowcore);
+ if (pcpu == &pcpu_devices[0])
+ return;
+ free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
+ free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+ atomic_inc(&init_mm.context.attach_count);
+ lc->cpu_nr = cpu;
+ lc->spinlock_lockval = arch_spin_lockval(cpu);
+ lc->percpu_offset = __per_cpu_offset[cpu];
+ lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->machine_flags = S390_lowcore.machine_flags;
+ lc->user_timer = lc->system_timer = lc->steal_timer = 0;
+ __ctl_store(lc->cregs_save_area, 0, 15);
+ save_access_regs((unsigned int *) lc->access_regs_save_area);
+ memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
+ MAX_FACILITY_BIT/8);
+}
+
+static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+ struct thread_info *ti = task_thread_info(tsk);
+
+ lc->kernel_stack = (unsigned long) task_stack_page(tsk)
+ + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->thread_info = (unsigned long) task_thread_info(tsk);
+ lc->current_task = (unsigned long) tsk;
+ lc->user_timer = ti->user_timer;
+ lc->system_timer = ti->system_timer;
+ lc->steal_timer = 0;
+}
+
+static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+{
+ struct _lowcore *lc = pcpu->lowcore;
+
+ lc->restart_stack = lc->kernel_stack;
+ lc->restart_fn = (unsigned long) func;
+ lc->restart_data = (unsigned long) data;
+ lc->restart_source = -1UL;
+ pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+}
+
+/*
+ * Call function via PSW restart on pcpu and stop the current cpu.
+ */
+static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
+ void *data, unsigned long stack)
+{
+ struct _lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
+ unsigned long source_cpu = stap();
+
+ __load_psw_mask(PSW_KERNEL_BITS);
+ if (pcpu->address == source_cpu)
+ func(data); /* should not return */
+ /* Stop target cpu (if func returns this stops the current cpu). */
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ /* Restart func on the target cpu and stop the current cpu. */
+ mem_assign_absolute(lc->restart_stack, stack);
+ mem_assign_absolute(lc->restart_fn, (unsigned long) func);
+ mem_assign_absolute(lc->restart_data, (unsigned long) data);
+ mem_assign_absolute(lc->restart_source, source_cpu);
+ asm volatile(
+ "0: sigp 0,%0,%2 # sigp restart to target cpu\n"
+ " brc 2,0b # busy, try again\n"
+ "1: sigp 0,%1,%3 # sigp stop to current cpu\n"
+ " brc 2,1b # busy, try again\n"
+ : : "d" (pcpu->address), "d" (source_cpu),
+ "K" (SIGP_RESTART), "K" (SIGP_STOP)
+ : "0", "1", "cc");
+ for (;;) ;
+}
+
+/*
+ * Enable additional logical cpus for multi-threading.
+ */
+static int pcpu_set_smt(unsigned int mtid)
+{
+ register unsigned long reg1 asm ("1") = (unsigned long) mtid;
+ int cc;
+
+ if (smp_cpu_mtid == mtid)
+ return 0;
+ asm volatile(
+ " sigp %1,0,%2 # sigp set multi-threading\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING)
+ : "cc");
+ if (cc == 0) {
+ smp_cpu_mtid = mtid;
+ smp_cpu_mt_shift = 0;
+ while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
+ smp_cpu_mt_shift++;
+ pcpu_devices[0].address = stap();
+ }
+ return cc;
+}
+
+/*
+ * Call function on an online CPU.
+ */
+void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+ struct pcpu *pcpu;
+
+ /* Use the current cpu if it is online. */
+ pcpu = pcpu_find_address(cpu_online_mask, stap());
+ if (!pcpu)
+ /* Use the first online cpu. */
+ pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
+ pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
+}
+
+/*
+ * Call function on the ipl CPU.
+ */
+void smp_call_ipl_cpu(void (*func)(void *), void *data)
+{
+ pcpu_delegate(&pcpu_devices[0], func, data,
+ pcpu_devices->lowcore->panic_stack -
+ PANIC_FRAME_OFFSET + PAGE_SIZE);
+}
+
+int smp_find_processor_id(u16 address)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (pcpu_devices[cpu].address == address)
+ return cpu;
+ return -1;
+}
+
+int smp_vcpu_scheduled(int cpu)
+{
+ return pcpu_running(pcpu_devices + cpu);
+}
+
+void smp_yield_cpu(int cpu)
+{
+ if (MACHINE_HAS_DIAG9C)
+ asm volatile("diag %0,0,0x9c"
+ : : "d" (pcpu_devices[cpu].address));
+ else if (MACHINE_HAS_DIAG44)
+ asm volatile("diag 0,0,0x44");
+}
+
+/*
+ * Send cpus emergency shutdown signal. This gives the cpus the
+ * opportunity to complete outstanding interrupts.
+ */
+static void smp_emergency_stop(cpumask_t *cpumask)
+{
+ u64 end;
+ int cpu;
+
+ end = get_tod_clock() + (1000000UL << 12);
+ for_each_cpu(cpu, cpumask) {
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ set_bit(ec_stop_cpu, &pcpu->ec_mask);
+ while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
+ 0, NULL) == SIGP_CC_BUSY &&
+ get_tod_clock() < end)
+ cpu_relax();
+ }
+ while (get_tod_clock() < end) {
+ for_each_cpu(cpu, cpumask)
+ if (pcpu_stopped(pcpu_devices + cpu))
+ cpumask_clear_cpu(cpu, cpumask);
+ if (cpumask_empty(cpumask))
+ break;
+ cpu_relax();
+ }
+}
+
+/*
+ * Stop all cpus but the current one.
+ */
+void smp_send_stop(void)
+{
+ cpumask_t cpumask;
+ int cpu;
+
+ /* Disable all interrupts/machine checks */
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ trace_hardirqs_off();
+
+ debug_set_critical();
+ cpumask_copy(&cpumask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &cpumask);
+
+ if (oops_in_progress)
+ smp_emergency_stop(&cpumask);
+
+ /* stop all processors */
+ for_each_cpu(cpu, &cpumask) {
+ struct pcpu *pcpu = pcpu_devices + cpu;
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ while (!pcpu_stopped(pcpu))
+ cpu_relax();
+ }
+}
+
+/*
+ * This is the main routine where commands issued by other
+ * cpus are handled.
+ */
+static void smp_handle_ext_call(void)
+{
+ unsigned long bits;
+
+ /* handle bit signal external calls */
+ bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ if (test_bit(ec_stop_cpu, &bits))
+ smp_stop_cpu();
+ if (test_bit(ec_schedule, &bits))
+ scheduler_ipi();
+ if (test_bit(ec_call_function_single, &bits))
+ generic_smp_call_function_single_interrupt();
+}
+
+static void do_ext_call_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
+ smp_handle_ext_call();
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+ pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+ pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+}
+
+/*
+ * parameter area for the set/clear control bit callbacks
+ */
+struct ec_creg_mask_parms {
+ unsigned long orval;
+ unsigned long andval;
+ int cr;
+};
+
+/*
+ * callback for setting/clearing control bits
+ */
+static void smp_ctl_bit_callback(void *info)
+{
+ struct ec_creg_mask_parms *pp = info;
+ unsigned long cregs[16];
+
+ __ctl_store(cregs, 0, 15);
+ cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
+ __ctl_load(cregs, 0, 15);
+}
+
+/*
+ * Set a bit in a control register of all cpus
+ */
+void smp_ctl_set_bit(int cr, int bit)
+{
+ struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
+
+ on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+}
+EXPORT_SYMBOL(smp_ctl_set_bit);
+
+/*
+ * Clear a bit in a control register of all cpus
+ */
+void smp_ctl_clear_bit(int cr, int bit)
+{
+ struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
+
+ on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+}
+EXPORT_SYMBOL(smp_ctl_clear_bit);
+
+#ifdef CONFIG_CRASH_DUMP
+
+static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
+{
+ void *lc = pcpu_devices[0].lowcore;
+ struct save_area_ext *sa_ext;
+ unsigned long vx_sa;
+
+ sa_ext = dump_save_area_create(cpu);
+ if (!sa_ext)
+ panic("could not allocate memory for save area\n");
+ if (is_boot_cpu) {
+ /* Copy the registers of the boot CPU. */
+ copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
+ SAVE_AREA_BASE - PAGE_SIZE, 0);
+ if (MACHINE_HAS_VX)
+ save_vx_regs_safe(sa_ext->vx_regs);
+ return;
+ }
+ /* Get the registers of a non-boot cpu. */
+ __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
+ memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa));
+ if (!MACHINE_HAS_VX)
+ return;
+ /* Get the VX registers */
+ vx_sa = __get_free_page(GFP_KERNEL);
+ if (!vx_sa)
+ panic("could not allocate memory for VX save area\n");
+ __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
+ memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
+ free_page(vx_sa);
+}
+
+/*
+ * Collect CPU state of the previous, crashed system.
+ * There are four cases:
+ * 1) standard zfcp dump
+ * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The boot CPU state is located in
+ * the absolute lowcore of the memory stored in the HSA. The zcore code
+ * will allocate the save area and copy the boot CPU state from the HSA.
+ * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
+ * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The firmware or the boot-loader
+ * stored the registers of the boot CPU in the absolute lowcore in the
+ * memory of the old system.
+ * 3) kdump and the old kernel did not store the CPU state,
+ * or stand-alone kdump for DASD
+ * condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The kexec code or the boot-loader
+ * stored the registers of the boot CPU in the memory of the old system.
+ * 4) kdump and the old kernel stored the CPU state
+ * condition: OLDMEM_BASE != NULL && is_kdump_kernel()
+ * The state of all CPUs is stored in ELF sections in the memory of the
+ * old system. The ELF sections are picked up by the crash_dump code
+ * via elfcorehdr_addr.
+ */
+static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
+{
+ unsigned int cpu, address, i, j;
+ int is_boot_cpu;
+
+ if (is_kdump_kernel())
+ /* Previous system stored the CPU states. Nothing to do. */
+ return;
+ if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
+ /* No previous system present, normal boot. */
+ return;
+ /* Set multi-threading state to the previous system. */
+ pcpu_set_smt(sclp_get_mtid_prev());
+ /* Collect CPU states. */
+ cpu = 0;
+ for (i = 0; i < info->configured; i++) {
+ /* Skip CPUs with different CPU type. */
+ if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ continue;
+ for (j = 0; j <= smp_cpu_mtid; j++, cpu++) {
+ address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j;
+ is_boot_cpu = (address == pcpu_devices[0].address);
+ if (is_boot_cpu && !OLDMEM_BASE)
+ /* Skip boot CPU for standard zfcp dump. */
+ continue;
+ /* Get state for this CPu. */
+ __smp_store_cpu_state(cpu, address, is_boot_cpu);
+ }
+ }
+}
+
+int smp_store_status(int cpu)
+{
+ unsigned long vx_sa;
+ struct pcpu *pcpu;
+
+ pcpu = pcpu_devices + cpu;
+ if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
+ 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+ if (!MACHINE_HAS_VX)
+ return 0;
+ vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
+ __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+ vx_sa, NULL);
+ return 0;
+}
+
+#endif /* CONFIG_CRASH_DUMP */
+
+void smp_cpu_set_polarization(int cpu, int val)
+{
+ pcpu_devices[cpu].polarization = val;
+}
+
+int smp_cpu_get_polarization(int cpu)
+{
+ return pcpu_devices[cpu].polarization;
+}
+
+static struct sclp_cpu_info *smp_get_cpu_info(void)
+{
+ static int use_sigp_detection;
+ struct sclp_cpu_info *info;
+ int address;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
+ use_sigp_detection = 1;
+ for (address = 0; address <= MAX_CPU_ADDRESS;
+ address += (1U << smp_cpu_mt_shift)) {
+ if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
+ SIGP_CC_NOT_OPERATIONAL)
+ continue;
+ info->cpu[info->configured].core_id =
+ address >> smp_cpu_mt_shift;
+ info->configured++;
+ }
+ info->combined = info->configured;
+ }
+ return info;
+}
+
+static int smp_add_present_cpu(int cpu);
+
+static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
+{
+ struct pcpu *pcpu;
+ cpumask_t avail;
+ int cpu, nr, i, j;
+ u16 address;
+
+ nr = 0;
+ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+ cpu = cpumask_first(&avail);
+ for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
+ if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ continue;
+ address = info->cpu[i].core_id << smp_cpu_mt_shift;
+ for (j = 0; j <= smp_cpu_mtid; j++) {
+ if (pcpu_find_address(cpu_present_mask, address + j))
+ continue;
+ pcpu = pcpu_devices + cpu;
+ pcpu->address = address + j;
+ pcpu->state =
+ (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
+ CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ set_cpu_present(cpu, true);
+ if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+ set_cpu_present(cpu, false);
+ else
+ nr++;
+ cpu = cpumask_next(cpu, &avail);
+ if (cpu >= nr_cpu_ids)
+ break;
+ }
+ }
+ return nr;
+}
+
+static void __init smp_detect_cpus(void)
+{
+ unsigned int cpu, mtid, c_cpus, s_cpus;
+ struct sclp_cpu_info *info;
+ u16 address;
+
+ /* Get CPU information */
+ info = smp_get_cpu_info();
+ if (!info)
+ panic("smp_detect_cpus failed to allocate memory\n");
+
+ /* Find boot CPU type */
+ if (info->has_cpu_type) {
+ address = stap();
+ for (cpu = 0; cpu < info->combined; cpu++)
+ if (info->cpu[cpu].core_id == address) {
+ /* The boot cpu dictates the cpu type. */
+ boot_cpu_type = info->cpu[cpu].type;
+ break;
+ }
+ if (cpu >= info->combined)
+ panic("Could not find boot CPU type");
+ }
+
+#ifdef CONFIG_CRASH_DUMP
+ /* Collect CPU state of previous system */
+ smp_store_cpu_states(info);
+#endif
+
+ /* Set multi-threading state for the current system */
+ mtid = sclp_get_mtid(boot_cpu_type);
+ mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
+ pcpu_set_smt(mtid);
+
+ /* Print number of CPUs */
+ c_cpus = s_cpus = 0;
+ for (cpu = 0; cpu < info->combined; cpu++) {
+ if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
+ continue;
+ if (cpu < info->configured)
+ c_cpus += smp_cpu_mtid + 1;
+ else
+ s_cpus += smp_cpu_mtid + 1;
+ }
+ pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
+
+ /* Add CPUs present at boot */
+ get_online_cpus();
+ __smp_rescan_cpus(info, 0);
+ put_online_cpus();
+ kfree(info);
+}
+
+/*
+ * Activate a secondary processor.
+ */
+static void smp_start_secondary(void *cpuvoid)
+{
+ S390_lowcore.last_update_clock = get_tod_clock();
+ S390_lowcore.restart_stack = (unsigned long) restart_stack;
+ S390_lowcore.restart_fn = (unsigned long) do_restart;
+ S390_lowcore.restart_data = 0;
+ S390_lowcore.restart_source = -1UL;
+ restore_access_regs(S390_lowcore.access_regs_save_area);
+ __ctl_load(S390_lowcore.cregs_save_area, 0, 15);
+ __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ cpu_init();
+ preempt_disable();
+ init_cpu_timer();
+ vtime_init();
+ pfault_init();
+ notify_cpu_starting(smp_processor_id());
+ set_cpu_online(smp_processor_id(), true);
+ inc_irq_stat(CPU_RST);
+ local_irq_enable();
+ cpu_startup_entry(CPUHP_ONLINE);
+}
+
+/* Upping and downing of CPUs */
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+ struct pcpu *pcpu;
+ int base, i, rc;
+
+ pcpu = pcpu_devices + cpu;
+ if (pcpu->state != CPU_STATE_CONFIGURED)
+ return -EIO;
+ base = cpu - (cpu % (smp_cpu_mtid + 1));
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (base + i < nr_cpu_ids)
+ if (cpu_online(base + i))
+ break;
+ }
+ /*
+ * If this is the first CPU of the core to get online
+ * do an initial CPU reset.
+ */
+ if (i > smp_cpu_mtid &&
+ pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) !=
+ SIGP_CC_ORDER_CODE_ACCEPTED)
+ return -EIO;
+
+ rc = pcpu_alloc_lowcore(pcpu, cpu);
+ if (rc)
+ return rc;
+ pcpu_prepare_secondary(pcpu, cpu);
+ pcpu_attach_task(pcpu, tidle);
+ pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+ /* Wait until cpu puts itself in the online & active maps */
+ while (!cpu_online(cpu) || !cpu_active(cpu))
+ cpu_relax();
+ return 0;
+}
+
+static unsigned int setup_possible_cpus __initdata;
+
+static int __init _setup_possible_cpus(char *s)
+{
+ get_option(&s, &setup_possible_cpus);
+ return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int __cpu_disable(void)
+{
+ unsigned long cregs[16];
+
+ /* Handle possible pending IPIs */
+ smp_handle_ext_call();
+ set_cpu_online(smp_processor_id(), false);
+ /* Disable pseudo page faults on this cpu. */
+ pfault_fini();
+ /* Disable interrupt sources via control register. */
+ __ctl_store(cregs, 0, 15);
+ cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */
+ cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */
+ cregs[14] &= ~0x1f000000UL; /* disable most machine checks */
+ __ctl_load(cregs, 0, 15);
+ clear_cpu_flag(CIF_NOHZ_DELAY);
+ return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+ struct pcpu *pcpu;
+
+ /* Wait until target cpu is down */
+ pcpu = pcpu_devices + cpu;
+ while (!pcpu_stopped(pcpu))
+ cpu_relax();
+ pcpu_free_lowcore(pcpu);
+ atomic_dec(&init_mm.context.attach_count);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+ if (MACHINE_HAS_TLB_LC)
+ cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+}
+
+void __noreturn cpu_die(void)
+{
+ idle_task_exit();
+ pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ for (;;) ;
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void __init smp_fill_possible_mask(void)
+{
+ unsigned int possible, sclp, cpu;
+
+ sclp = min(smp_max_threads, sclp_get_mtid_max() + 1);
+ sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids;
+ possible = setup_possible_cpus ?: nr_cpu_ids;
+ possible = min(possible, sclp);
+ for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+ set_cpu_possible(cpu, true);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+ /* request the 0x1201 emergency signal external interrupt */
+ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
+ panic("Couldn't request external interrupt 0x1201");
+ /* request the 0x1202 external call external interrupt */
+ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
+ panic("Couldn't request external interrupt 0x1202");
+ smp_detect_cpus();
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+ struct pcpu *pcpu = pcpu_devices;
+
+ pcpu->state = CPU_STATE_CONFIGURED;
+ pcpu->address = stap();
+ pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
+ S390_lowcore.percpu_offset = __per_cpu_offset[0];
+ smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+ set_cpu_present(0, true);
+ set_cpu_online(0, true);
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+void __init smp_setup_processor_id(void)
+{
+ S390_lowcore.cpu_nr = 0;
+ S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
+}
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ *
+ * usually you want to run this on all CPUs ;)
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+ return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static ssize_t cpu_configure_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+
+static ssize_t cpu_configure_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pcpu *pcpu;
+ int cpu, val, rc, i;
+ char delim;
+
+ if (sscanf(buf, "%d %c", &val, &delim) != 1)
+ return -EINVAL;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+ rc = -EBUSY;
+ /* disallow configuration changes of online cpus and cpu 0 */
+ cpu = dev->id;
+ cpu -= cpu % (smp_cpu_mtid + 1);
+ if (cpu == 0)
+ goto out;
+ for (i = 0; i <= smp_cpu_mtid; i++)
+ if (cpu_online(cpu + i))
+ goto out;
+ pcpu = pcpu_devices + cpu;
+ rc = 0;
+ switch (val) {
+ case 0:
+ if (pcpu->state != CPU_STATE_CONFIGURED)
+ break;
+ rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+ if (rc)
+ break;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+ continue;
+ pcpu[i].state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu + i,
+ POLARIZATION_UNKNOWN);
+ }
+ topology_expect_change();
+ break;
+ case 1:
+ if (pcpu->state != CPU_STATE_STANDBY)
+ break;
+ rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift);
+ if (rc)
+ break;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+ continue;
+ pcpu[i].state = CPU_STATE_CONFIGURED;
+ smp_cpu_set_polarization(cpu + i,
+ POLARIZATION_UNKNOWN);
+ }
+ topology_expect_change();
+ break;
+ default:
+ break;
+ }
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ return rc ? rc : count;
+}
+static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static ssize_t show_cpu_address(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+}
+static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
+
+static struct attribute *cpu_common_attrs[] = {
+#ifdef CONFIG_HOTPLUG_CPU
+ &dev_attr_configure.attr,
+#endif
+ &dev_attr_address.attr,
+ NULL,
+};
+
+static struct attribute_group cpu_common_attr_group = {
+ .attrs = cpu_common_attrs,
+};
+
+static struct attribute *cpu_online_attrs[] = {
+ &dev_attr_idle_count.attr,
+ &dev_attr_idle_time_us.attr,
+ NULL,
+};
+
+static struct attribute_group cpu_online_attr_group = {
+ .attrs = cpu_online_attrs,
+};
+
+static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (unsigned int)(long)hcpu;
+ struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ int err = 0;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+ break;
+ case CPU_DEAD:
+ sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+ break;
+ }
+ return notifier_from_errno(err);
+}
+
+static int smp_add_present_cpu(int cpu)
+{
+ struct device *s;
+ struct cpu *c;
+ int rc;
+
+ c = kzalloc(sizeof(*c), GFP_KERNEL);
+ if (!c)
+ return -ENOMEM;
+ per_cpu(cpu_device, cpu) = c;
+ s = &c->dev;
+ c->hotpluggable = 1;
+ rc = register_cpu(c, cpu);
+ if (rc)
+ goto out;
+ rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+ if (rc)
+ goto out_cpu;
+ if (cpu_online(cpu)) {
+ rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+ if (rc)
+ goto out_online;
+ }
+ rc = topology_cpu_init(c);
+ if (rc)
+ goto out_topology;
+ return 0;
+
+out_topology:
+ if (cpu_online(cpu))
+ sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+out_online:
+ sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+out_cpu:
+#ifdef CONFIG_HOTPLUG_CPU
+ unregister_cpu(c);
+#endif
+out:
+ return rc;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int __ref smp_rescan_cpus(void)
+{
+ struct sclp_cpu_info *info;
+ int nr;
+
+ info = smp_get_cpu_info();
+ if (!info)
+ return -ENOMEM;
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+ nr = __smp_rescan_cpus(info, 1);
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ kfree(info);
+ if (nr)
+ topology_schedule_update();
+ return 0;
+}
+
+static ssize_t __ref rescan_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc;
+
+ rc = smp_rescan_cpus();
+ return rc ? rc : count;
+}
+static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __init s390_smp_init(void)
+{
+ int cpu, rc = 0;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
+ if (rc)
+ return rc;
+#endif
+ cpu_notifier_register_begin();
+ for_each_present_cpu(cpu) {
+ rc = smp_add_present_cpu(cpu);
+ if (rc)
+ goto out;
+ }
+
+ __hotcpu_notifier(smp_cpu_notify, 0);
+
+out:
+ cpu_notifier_register_done();
+ return rc;
+}
+subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
new file mode 100644
index 000000000..1785cd822
--- /dev/null
+++ b/arch/s390/kernel/stacktrace.c
@@ -0,0 +1,96 @@
+/*
+ * Stack trace management functions
+ *
+ * Copyright IBM Corp. 2006
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+
+static unsigned long save_context_stack(struct stack_trace *trace,
+ unsigned long sp,
+ unsigned long low,
+ unsigned long high,
+ int savesched)
+{
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+ unsigned long addr;
+
+ while(1) {
+ sp &= PSW_ADDR_INSN;
+ if (sp < low || sp > high)
+ return sp;
+ sf = (struct stack_frame *)sp;
+ while(1) {
+ addr = sf->gprs[8] & PSW_ADDR_INSN;
+ if (!trace->skip)
+ trace->entries[trace->nr_entries++] = addr;
+ else
+ trace->skip--;
+ if (trace->nr_entries >= trace->max_entries)
+ return sp;
+ low = sp;
+ sp = sf->back_chain & PSW_ADDR_INSN;
+ if (!sp)
+ break;
+ if (sp <= low || sp > high - sizeof(*sf))
+ return sp;
+ sf = (struct stack_frame *)sp;
+ }
+ /* Zero backchain detected, check for interrupt frame. */
+ sp = (unsigned long)(sf + 1);
+ if (sp <= low || sp > high - sizeof(*regs))
+ return sp;
+ regs = (struct pt_regs *)sp;
+ addr = regs->psw.addr & PSW_ADDR_INSN;
+ if (savesched || !in_sched_functions(addr)) {
+ if (!trace->skip)
+ trace->entries[trace->nr_entries++] = addr;
+ else
+ trace->skip--;
+ }
+ if (trace->nr_entries >= trace->max_entries)
+ return sp;
+ low = sp;
+ sp = regs->gprs[15];
+ }
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+ register unsigned long sp asm ("15");
+ unsigned long orig_sp, new_sp;
+
+ orig_sp = sp & PSW_ADDR_INSN;
+ new_sp = save_context_stack(trace, orig_sp,
+ S390_lowcore.panic_stack - PAGE_SIZE,
+ S390_lowcore.panic_stack, 1);
+ if (new_sp != orig_sp)
+ return;
+ new_sp = save_context_stack(trace, new_sp,
+ S390_lowcore.async_stack - ASYNC_SIZE,
+ S390_lowcore.async_stack, 1);
+ if (new_sp != orig_sp)
+ return;
+ save_context_stack(trace, new_sp,
+ S390_lowcore.thread_info,
+ S390_lowcore.thread_info + THREAD_SIZE, 1);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+ unsigned long sp, low, high;
+
+ sp = tsk->thread.ksp & PSW_ADDR_INSN;
+ low = (unsigned long) task_stack_page(tsk);
+ high = (unsigned long) task_pt_regs(tsk);
+ save_context_stack(trace, sp, low, high, 0);
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
new file mode 100644
index 000000000..d3236c9e2
--- /dev/null
+++ b/arch/s390/kernel/suspend.c
@@ -0,0 +1,225 @@
+/*
+ * Suspend support specific for s390.
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ */
+
+#include <linux/pfn.h>
+#include <linux/suspend.h>
+#include <linux/mm.h>
+#include <asm/ctl_reg.h>
+#include <asm/ipl.h>
+#include <asm/cio.h>
+#include <asm/pci.h>
+#include <asm/sections.h>
+#include "entry.h"
+
+/*
+ * The restore of the saved pages in an hibernation image will set
+ * the change and referenced bits in the storage key for each page.
+ * Overindication of the referenced bits after an hibernation cycle
+ * does not cause any harm but the overindication of the change bits
+ * would cause trouble.
+ * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
+ * page to the most significant byte of the associated page frame
+ * number in the hibernation image.
+ */
+
+/*
+ * Key storage is allocated as a linked list of pages.
+ * The size of the keys array is (PAGE_SIZE - sizeof(long))
+ */
+struct page_key_data {
+ struct page_key_data *next;
+ unsigned char data[];
+};
+
+#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *))
+
+static struct page_key_data *page_key_data;
+static struct page_key_data *page_key_rp, *page_key_wp;
+static unsigned long page_key_rx, page_key_wx;
+unsigned long suspend_zero_pages;
+
+/*
+ * For each page in the hibernation image one additional byte is
+ * stored in the most significant byte of the page frame number.
+ * On suspend no additional memory is required but on resume the
+ * keys need to be memorized until the page data has been restored.
+ * Only then can the storage keys be set to their old state.
+ */
+unsigned long page_key_additional_pages(unsigned long pages)
+{
+ return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+}
+
+/*
+ * Free page_key_data list of arrays.
+ */
+void page_key_free(void)
+{
+ struct page_key_data *pkd;
+
+ while (page_key_data) {
+ pkd = page_key_data;
+ page_key_data = pkd->next;
+ free_page((unsigned long) pkd);
+ }
+}
+
+/*
+ * Allocate page_key_data list of arrays with enough room to store
+ * one byte for each page in the hibernation image.
+ */
+int page_key_alloc(unsigned long pages)
+{
+ struct page_key_data *pk;
+ unsigned long size;
+
+ size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
+ while (size--) {
+ pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
+ if (!pk) {
+ page_key_free();
+ return -ENOMEM;
+ }
+ pk->next = page_key_data;
+ page_key_data = pk;
+ }
+ page_key_rp = page_key_wp = page_key_data;
+ page_key_rx = page_key_wx = 0;
+ return 0;
+}
+
+/*
+ * Save the storage key into the upper 8 bits of the page frame number.
+ */
+void page_key_read(unsigned long *pfn)
+{
+ unsigned long addr;
+
+ addr = (unsigned long) page_address(pfn_to_page(*pfn));
+ *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
+}
+
+/*
+ * Extract the storage key from the upper 8 bits of the page frame number
+ * and store it in the page_key_data list of arrays.
+ */
+void page_key_memorize(unsigned long *pfn)
+{
+ page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
+ *(unsigned char *) pfn = 0;
+ if (++page_key_wx < PAGE_KEY_DATA_SIZE)
+ return;
+ page_key_wp = page_key_wp->next;
+ page_key_wx = 0;
+}
+
+/*
+ * Get the next key from the page_key_data list of arrays and set the
+ * storage key of the page referred by @address. If @address refers to
+ * a "safe" page the swsusp_arch_resume code will transfer the storage
+ * key from the buffer page to the original page.
+ */
+void page_key_write(void *address)
+{
+ page_set_storage_key((unsigned long) address,
+ page_key_rp->data[page_key_rx], 0);
+ if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
+ return;
+ page_key_rp = page_key_rp->next;
+ page_key_rx = 0;
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+ unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
+ unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
+ unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
+
+ /* Always save lowcore pages (LC protection might be enabled). */
+ if (pfn <= LC_PAGES)
+ return 0;
+ if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
+ return 1;
+ /* Skip memory holes and read-only pages (NSS, DCSS, ...). */
+ if (pfn >= stext_pfn && pfn <= eshared_pfn)
+ return ipl_info.type == IPL_TYPE_NSS ? 1 : 0;
+ if (tprot(PFN_PHYS(pfn)))
+ return 1;
+ return 0;
+}
+
+/*
+ * PM notifier callback for suspend
+ */
+static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
+ if (!suspend_zero_pages)
+ return NOTIFY_BAD;
+ break;
+ case PM_POST_SUSPEND:
+ case PM_POST_HIBERNATION:
+ free_pages(suspend_zero_pages, LC_ORDER);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static int __init suspend_pm_init(void)
+{
+ pm_notifier(suspend_pm_cb, 0);
+ return 0;
+}
+arch_initcall(suspend_pm_init);
+
+void save_processor_state(void)
+{
+ /* swsusp_arch_suspend() actually saves all cpu register contents.
+ * Machine checks must be disabled since swsusp_arch_suspend() stores
+ * register contents to their lowcore save areas. That's the same
+ * place where register contents on machine checks would be saved.
+ * To avoid register corruption disable machine checks.
+ * We must also disable machine checks in the new psw mask for
+ * program checks, since swsusp_arch_suspend() may generate program
+ * checks. Disabling machine checks for all other new psw masks is
+ * just paranoia.
+ */
+ local_mcck_disable();
+ /* Disable lowcore protection */
+ __ctl_clear_bit(0,28);
+ S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
+ S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
+}
+
+void restore_processor_state(void)
+{
+ S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
+ S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
+ /* Enable lowcore protection */
+ __ctl_set_bit(0,28);
+ local_mcck_enable();
+}
+
+/* Called at the end of swsusp_arch_resume */
+void s390_early_resume(void)
+{
+ lgr_info_log();
+ channel_subsystem_reinit();
+ zpci_rescan();
+}
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
new file mode 100644
index 000000000..ca6294645
--- /dev/null
+++ b/arch/s390/kernel/swsusp.S
@@ -0,0 +1,317 @@
+/*
+ * S390 64-bit swsusp implementation
+ *
+ * Copyright IBM Corp. 2009
+ *
+ * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
+ * Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/sigp.h>
+
+/*
+ * Save register context in absolute 0 lowcore and call swsusp_save() to
+ * create in-memory kernel image. The context is saved in the designated
+ * "store status" memory locations (see POP).
+ * We return from this function twice. The first time during the suspend to
+ * disk process. The second time via the swsusp_arch_resume() function
+ * (see below) in the resume process.
+ * This function runs with disabled interrupts.
+ */
+ .section .text
+ENTRY(swsusp_arch_suspend)
+ stmg %r6,%r15,__SF_GPRS(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+
+ /* Deactivate DAT */
+ stnsm __SF_EMPTY(%r15),0xfb
+
+ /* Store prefix register on stack */
+ stpx __SF_EMPTY(%r15)
+
+ /* Save prefix register contents for lowcore copy */
+ llgf %r10,__SF_EMPTY(%r15)
+
+ /* Get pointer to save area */
+ lghi %r1,0x1000
+
+ /* Save CPU address */
+ stap __LC_EXT_CPU_ADDR(%r0)
+
+ /* Store registers */
+ mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */
+ stfpc 0x31c(%r1) /* store fpu control */
+ std 0,0x200(%r1) /* store f0 */
+ std 1,0x208(%r1) /* store f1 */
+ std 2,0x210(%r1) /* store f2 */
+ std 3,0x218(%r1) /* store f3 */
+ std 4,0x220(%r1) /* store f4 */
+ std 5,0x228(%r1) /* store f5 */
+ std 6,0x230(%r1) /* store f6 */
+ std 7,0x238(%r1) /* store f7 */
+ std 8,0x240(%r1) /* store f8 */
+ std 9,0x248(%r1) /* store f9 */
+ std 10,0x250(%r1) /* store f10 */
+ std 11,0x258(%r1) /* store f11 */
+ std 12,0x260(%r1) /* store f12 */
+ std 13,0x268(%r1) /* store f13 */
+ std 14,0x270(%r1) /* store f14 */
+ std 15,0x278(%r1) /* store f15 */
+ stam %a0,%a15,0x340(%r1) /* store access registers */
+ stctg %c0,%c15,0x380(%r1) /* store control registers */
+ stmg %r0,%r15,0x280(%r1) /* store general registers */
+
+ stpt 0x328(%r1) /* store timer */
+ stck __SF_EMPTY(%r15) /* store clock */
+ stckc 0x330(%r1) /* store clock comparator */
+
+ /* Update cputime accounting before going to sleep */
+ lg %r0,__LC_LAST_UPDATE_TIMER
+ slg %r0,0x328(%r1)
+ alg %r0,__LC_SYSTEM_TIMER
+ stg %r0,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1)
+ lg %r0,__LC_LAST_UPDATE_CLOCK
+ slg %r0,__SF_EMPTY(%r15)
+ alg %r0,__LC_STEAL_TIMER
+ stg %r0,__LC_STEAL_TIMER
+ mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
+
+ /* Activate DAT */
+ stosm __SF_EMPTY(%r15),0x04
+
+ /* Set prefix page to zero */
+ xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+ spx __SF_EMPTY(%r15)
+
+ /* Save absolute zero pages */
+ larl %r2,suspend_zero_pages
+ lg %r2,0(%r2)
+ lghi %r4,0
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Copy lowcore to absolute zero lowcore */
+ lghi %r2,0
+ lgr %r4,%r10
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Save image */
+ brasl %r14,swsusp_save
+
+ /* Restore prefix register and return */
+ lghi %r1,0x1000
+ spx 0x318(%r1)
+ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+ lghi %r2,0
+ br %r14
+
+/*
+ * Restore saved memory image to correct place and restore register context.
+ * Then we return to the function that called swsusp_arch_suspend().
+ * swsusp_arch_resume() runs with disabled interrupts.
+ */
+ENTRY(swsusp_arch_resume)
+ stmg %r6,%r15,__SF_GPRS(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+
+ /* Make all free pages stable */
+ lghi %r2,1
+ brasl %r14,arch_set_page_states
+
+ /* Deactivate DAT */
+ stnsm __SF_EMPTY(%r15),0xfb
+
+ /* Set prefix page to zero */
+ xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
+ spx __SF_EMPTY(%r15)
+
+ /* Restore saved image */
+ larl %r1,restore_pblist
+ lg %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 2f
+0:
+ lg %r2,8(%r1)
+ lg %r4,0(%r1)
+ iske %r0,%r4
+ lghi %r3,PAGE_SIZE
+ lghi %r5,PAGE_SIZE
+1:
+ mvcle %r2,%r4,0
+ jo 1b
+ lg %r2,8(%r1)
+ sske %r0,%r2
+ lg %r1,16(%r1)
+ ltgr %r1,%r1
+ jnz 0b
+2:
+ ptlb /* flush tlb */
+
+ /* Reset System */
+ larl %r1,restart_entry
+ larl %r2,.Lrestart_diag308_psw
+ og %r1,0(%r2)
+ stg %r1,0(%r0)
+ larl %r1,.Lnew_pgm_check_psw
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r1)
+ mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1)
+ lghi %r0,0
+ diag %r0,%r0,0x308
+restart_entry:
+ lhi %r1,1
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE
+ sam64
+#ifdef CONFIG_SMP
+ larl %r1,smp_cpu_mt_shift
+ icm %r1,15,0(%r1)
+ jz smt_done
+ llgfr %r1,%r1
+smt_loop:
+ sigp %r1,%r0,SIGP_SET_MULTI_THREADING
+ brc 8,smt_done /* accepted */
+ brc 2,smt_loop /* busy, try again */
+smt_done:
+#endif
+ larl %r1,.Lnew_pgm_check_psw
+ lpswe 0(%r1)
+pgm_check_entry:
+
+ /* Switch to original suspend CPU */
+ larl %r1,.Lresume_cpu /* Resume CPU address: r2 */
+ stap 0(%r1)
+ llgh %r2,0(%r1)
+ llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */
+ cgr %r1,%r2
+ je restore_registers /* r1 = r2 -> nothing to do */
+ larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */
+ mvc __LC_RST_NEW_PSW(16,%r0),0(%r4)
+3:
+ sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */
+ brc 8,4f /* accepted */
+ brc 2,3b /* busy, try again */
+
+ /* Suspend CPU not available -> panic */
+ larl %r15,init_thread_union
+ ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER)
+ larl %r2,.Lpanic_string
+ larl %r3,_sclp_print_early
+ lghi %r1,0
+ sam31
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE
+ basr %r14,%r3
+ larl %r3,.Ldisabled_wait_31
+ lpsw 0(%r3)
+4:
+ /* Switch to suspend CPU */
+ sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */
+ brc 2,4b /* busy, try again */
+5:
+ sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */
+ brc 2,5b /* busy, try again */
+6: j 6b
+
+restart_suspend:
+ larl %r1,.Lresume_cpu
+ llgh %r2,0(%r1)
+7:
+ sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */
+ brc 8,7b /* accepted, status 0, still running */
+ brc 2,7b /* busy, try again */
+ tmll %r9,0x40 /* Test if resume CPU is stopped */
+ jz 7b
+
+restore_registers:
+ /* Restore registers */
+ lghi %r13,0x1000 /* %r1 = pointer to save area */
+
+ /* Ignore time spent in suspended state. */
+ llgf %r1,0x318(%r13)
+ stck __LC_LAST_UPDATE_CLOCK(%r1)
+ spt 0x328(%r13) /* reprogram timer */
+ //sckc 0x330(%r13) /* set clock comparator */
+
+ lctlg %c0,%c15,0x380(%r13) /* load control registers */
+ lam %a0,%a15,0x340(%r13) /* load access registers */
+
+ lfpc 0x31c(%r13) /* load fpu control */
+ ld 0,0x200(%r13) /* load f0 */
+ ld 1,0x208(%r13) /* load f1 */
+ ld 2,0x210(%r13) /* load f2 */
+ ld 3,0x218(%r13) /* load f3 */
+ ld 4,0x220(%r13) /* load f4 */
+ ld 5,0x228(%r13) /* load f5 */
+ ld 6,0x230(%r13) /* load f6 */
+ ld 7,0x238(%r13) /* load f7 */
+ ld 8,0x240(%r13) /* load f8 */
+ ld 9,0x248(%r13) /* load f9 */
+ ld 10,0x250(%r13) /* load f10 */
+ ld 11,0x258(%r13) /* load f11 */
+ ld 12,0x260(%r13) /* load f12 */
+ ld 13,0x268(%r13) /* load f13 */
+ ld 14,0x270(%r13) /* load f14 */
+ ld 15,0x278(%r13) /* load f15 */
+
+ /* Load old stack */
+ lg %r15,0x2f8(%r13)
+
+ /* Save prefix register */
+ mvc __SF_EMPTY(4,%r15),0x318(%r13)
+
+ /* Restore absolute zero pages */
+ lghi %r2,0
+ larl %r4,suspend_zero_pages
+ lg %r4,0(%r4)
+ lghi %r3,2*PAGE_SIZE
+ lghi %r5,2*PAGE_SIZE
+1: mvcle %r2,%r4,0
+ jo 1b
+
+ /* Restore prefix register */
+ spx __SF_EMPTY(%r15)
+
+ /* Activate DAT */
+ stosm __SF_EMPTY(%r15),0x04
+
+ /* Make all free pages unstable */
+ lghi %r2,0
+ brasl %r14,arch_set_page_states
+
+ /* Call arch specific early resume code */
+ brasl %r14,s390_early_resume
+
+ /* Return 0 */
+ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
+ lghi %r2,0
+ br %r14
+
+ .section .data..nosave,"aw",@progbits
+ .align 8
+.Ldisabled_wait_31:
+ .long 0x000a0000,0x00000000
+.Lpanic_string:
+ .asciz "Resume not possible because suspend CPU is no longer available"
+ .align 8
+.Lrestart_diag308_psw:
+ .long 0x00080000,0x80000000
+.Lrestart_suspend_psw:
+ .quad 0x0000000180000000,restart_suspend
+.Lnew_pgm_check_psw:
+ .quad 0,pgm_check_entry
+.Lresume_cpu:
+ .byte 0,0
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
new file mode 100644
index 000000000..f145490cc
--- /dev/null
+++ b/arch/s390/kernel/sys_s390.c
@@ -0,0 +1,91 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Thomas Spatzier (tspat@de.ibm.com)
+ *
+ * Derived from "arch/i386/kernel/sys_i386.c"
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/s390
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/personality.h>
+#include <linux/unistd.h>
+#include <linux/ipc.h>
+#include <asm/uaccess.h>
+#include "entry.h"
+
+/*
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
+ */
+
+struct s390_mmap_arg_struct {
+ unsigned long addr;
+ unsigned long len;
+ unsigned long prot;
+ unsigned long flags;
+ unsigned long fd;
+ unsigned long offset;
+};
+
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
+{
+ struct s390_mmap_arg_struct a;
+ int error = -EFAULT;
+
+ if (copy_from_user(&a, arg, sizeof(a)))
+ goto out;
+ error = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+out:
+ return error;
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.
+ */
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
+ unsigned long, third, void __user *, ptr)
+{
+ if (call >> 16)
+ return -EINVAL;
+ /* The s390 sys_ipc variant has only five parameters instead of six
+ * like the generic variant. The only difference is the handling of
+ * the SEMTIMEDOP subcall where on s390 the third parameter is used
+ * as a pointer to a struct timespec where the generic variant uses
+ * the fifth parameter.
+ * Therefore we can call the generic variant by simply passing the
+ * third parameter also as fifth parameter.
+ */
+ return sys_ipc(call, first, second, third, ptr, third);
+}
+
+SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
+{
+ unsigned int ret;
+
+ if (personality(current->personality) == PER_LINUX32 &&
+ personality(personality) == PER_LINUX)
+ personality |= PER_LINUX32;
+ ret = sys_personality(personality);
+ if (personality(ret) == PER_LINUX32)
+ ret &= ~PER_LINUX32;
+
+ return ret;
+}
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
new file mode 100644
index 000000000..1acad0268
--- /dev/null
+++ b/arch/s390/kernel/syscalls.S
@@ -0,0 +1,365 @@
+/*
+ * definitions for sys_call_table, each line represents an
+ * entry in the table in the form
+ * SYSCALL(64 bit syscall, 31 bit emulated syscall)
+ *
+ * this file is meant to be included from entry.S
+ */
+
+#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall)
+
+NI_SYSCALL /* 0 */
+SYSCALL(sys_exit,compat_sys_exit)
+SYSCALL(sys_fork,sys_fork)
+SYSCALL(sys_read,compat_sys_s390_read)
+SYSCALL(sys_write,compat_sys_s390_write)
+SYSCALL(sys_open,compat_sys_open) /* 5 */
+SYSCALL(sys_close,compat_sys_close)
+SYSCALL(sys_restart_syscall,sys_restart_syscall)
+SYSCALL(sys_creat,compat_sys_creat)
+SYSCALL(sys_link,compat_sys_link)
+SYSCALL(sys_unlink,compat_sys_unlink) /* 10 */
+SYSCALL(sys_execve,compat_sys_execve)
+SYSCALL(sys_chdir,compat_sys_chdir)
+SYSCALL(sys_ni_syscall,compat_sys_time) /* old time syscall */
+SYSCALL(sys_mknod,compat_sys_mknod)
+SYSCALL(sys_chmod,compat_sys_chmod) /* 15 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/
+NI_SYSCALL /* old break syscall holder */
+NI_SYSCALL /* old stat syscall holder */
+SYSCALL(sys_lseek,compat_sys_lseek)
+SYSCALL(sys_getpid,sys_getpid) /* 20 */
+SYSCALL(sys_mount,compat_sys_mount)
+SYSCALL(sys_oldumount,compat_sys_oldumount)
+SYSCALL(sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */
+SYSCALL(sys_ptrace,compat_sys_ptrace)
+SYSCALL(sys_alarm,compat_sys_alarm)
+NI_SYSCALL /* old fstat syscall */
+SYSCALL(sys_pause,sys_pause)
+SYSCALL(sys_utime,compat_sys_utime) /* 30 */
+NI_SYSCALL /* old stty syscall */
+NI_SYSCALL /* old gtty syscall */
+SYSCALL(sys_access,compat_sys_access)
+SYSCALL(sys_nice,compat_sys_nice)
+NI_SYSCALL /* 35 old ftime syscall */
+SYSCALL(sys_sync,sys_sync)
+SYSCALL(sys_kill,compat_sys_kill)
+SYSCALL(sys_rename,compat_sys_rename)
+SYSCALL(sys_mkdir,compat_sys_mkdir)
+SYSCALL(sys_rmdir,compat_sys_rmdir) /* 40 */
+SYSCALL(sys_dup,compat_sys_dup)
+SYSCALL(sys_pipe,compat_sys_pipe)
+SYSCALL(sys_times,compat_sys_times)
+NI_SYSCALL /* old prof syscall */
+SYSCALL(sys_brk,compat_sys_brk) /* 45 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_getgid16) /* old getgid16 syscall*/
+SYSCALL(sys_signal,compat_sys_signal)
+SYSCALL(sys_ni_syscall,compat_sys_s390_geteuid16) /* old geteuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegid16 syscall */
+SYSCALL(sys_acct,compat_sys_acct)
+SYSCALL(sys_umount,compat_sys_umount)
+NI_SYSCALL /* old lock syscall */
+SYSCALL(sys_ioctl,compat_sys_ioctl)
+SYSCALL(sys_fcntl,compat_sys_fcntl) /* 55 */
+NI_SYSCALL /* intel mpx syscall */
+SYSCALL(sys_setpgid,compat_sys_setpgid)
+NI_SYSCALL /* old ulimit syscall */
+NI_SYSCALL /* old uname syscall */
+SYSCALL(sys_umask,compat_sys_umask) /* 60 */
+SYSCALL(sys_chroot,compat_sys_chroot)
+SYSCALL(sys_ustat,compat_sys_ustat)
+SYSCALL(sys_dup2,compat_sys_dup2)
+SYSCALL(sys_getppid,sys_getppid)
+SYSCALL(sys_getpgrp,sys_getpgrp) /* 65 */
+SYSCALL(sys_setsid,sys_setsid)
+SYSCALL(sys_sigaction,compat_sys_sigaction)
+NI_SYSCALL /* old sgetmask syscall*/
+NI_SYSCALL /* old ssetmask syscall*/
+SYSCALL(sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */
+SYSCALL(sys_sigsuspend,compat_sys_sigsuspend)
+SYSCALL(sys_sigpending,compat_sys_sigpending)
+SYSCALL(sys_sethostname,compat_sys_sethostname)
+SYSCALL(sys_setrlimit,compat_sys_setrlimit) /* 75 */
+SYSCALL(sys_getrlimit,compat_sys_old_getrlimit)
+SYSCALL(sys_getrusage,compat_sys_getrusage)
+SYSCALL(sys_gettimeofday,compat_sys_gettimeofday)
+SYSCALL(sys_settimeofday,compat_sys_settimeofday)
+SYSCALL(sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */
+NI_SYSCALL /* old select syscall */
+SYSCALL(sys_symlink,compat_sys_symlink)
+NI_SYSCALL /* old lstat syscall */
+SYSCALL(sys_readlink,compat_sys_readlink) /* 85 */
+SYSCALL(sys_uselib,compat_sys_uselib)
+SYSCALL(sys_swapon,compat_sys_swapon)
+SYSCALL(sys_reboot,compat_sys_reboot)
+SYSCALL(sys_ni_syscall,compat_sys_old_readdir) /* old readdir syscall */
+SYSCALL(sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */
+SYSCALL(sys_munmap,compat_sys_munmap)
+SYSCALL(sys_truncate,compat_sys_truncate)
+SYSCALL(sys_ftruncate,compat_sys_ftruncate)
+SYSCALL(sys_fchmod,compat_sys_fchmod)
+SYSCALL(sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/
+SYSCALL(sys_getpriority,compat_sys_getpriority)
+SYSCALL(sys_setpriority,compat_sys_setpriority)
+NI_SYSCALL /* old profil syscall */
+SYSCALL(sys_statfs,compat_sys_statfs)
+SYSCALL(sys_fstatfs,compat_sys_fstatfs) /* 100 */
+NI_SYSCALL /* ioperm for i386 */
+SYSCALL(sys_socketcall,compat_sys_socketcall)
+SYSCALL(sys_syslog,compat_sys_syslog)
+SYSCALL(sys_setitimer,compat_sys_setitimer)
+SYSCALL(sys_getitimer,compat_sys_getitimer) /* 105 */
+SYSCALL(sys_newstat,compat_sys_newstat)
+SYSCALL(sys_newlstat,compat_sys_newlstat)
+SYSCALL(sys_newfstat,compat_sys_newfstat)
+NI_SYSCALL /* old uname syscall */
+SYSCALL(sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */
+SYSCALL(sys_vhangup,sys_vhangup)
+NI_SYSCALL /* old "idle" system call */
+NI_SYSCALL /* vm86old for i386 */
+SYSCALL(sys_wait4,compat_sys_wait4)
+SYSCALL(sys_swapoff,compat_sys_swapoff) /* 115 */
+SYSCALL(sys_sysinfo,compat_sys_sysinfo)
+SYSCALL(sys_s390_ipc,compat_sys_s390_ipc)
+SYSCALL(sys_fsync,compat_sys_fsync)
+SYSCALL(sys_sigreturn,compat_sys_sigreturn)
+SYSCALL(sys_clone,compat_sys_clone) /* 120 */
+SYSCALL(sys_setdomainname,compat_sys_setdomainname)
+SYSCALL(sys_newuname,compat_sys_newuname)
+NI_SYSCALL /* modify_ldt for i386 */
+SYSCALL(sys_adjtimex,compat_sys_adjtimex)
+SYSCALL(sys_mprotect,compat_sys_mprotect) /* 125 */
+SYSCALL(sys_sigprocmask,compat_sys_sigprocmask)
+NI_SYSCALL /* old "create module" */
+SYSCALL(sys_init_module,compat_sys_init_module)
+SYSCALL(sys_delete_module,compat_sys_delete_module)
+NI_SYSCALL /* 130: old get_kernel_syms */
+SYSCALL(sys_quotactl,compat_sys_quotactl)
+SYSCALL(sys_getpgid,compat_sys_getpgid)
+SYSCALL(sys_fchdir,compat_sys_fchdir)
+SYSCALL(sys_bdflush,compat_sys_bdflush)
+SYSCALL(sys_sysfs,compat_sys_sysfs) /* 135 */
+SYSCALL(sys_s390_personality,compat_sys_s390_personality)
+NI_SYSCALL /* for afs_syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */
+SYSCALL(sys_llseek,compat_sys_llseek) /* 140 */
+SYSCALL(sys_getdents,compat_sys_getdents)
+SYSCALL(sys_select,compat_sys_select)
+SYSCALL(sys_flock,compat_sys_flock)
+SYSCALL(sys_msync,compat_sys_msync)
+SYSCALL(sys_readv,compat_sys_readv) /* 145 */
+SYSCALL(sys_writev,compat_sys_writev)
+SYSCALL(sys_getsid,compat_sys_getsid)
+SYSCALL(sys_fdatasync,compat_sys_fdatasync)
+SYSCALL(sys_sysctl,compat_sys_sysctl)
+SYSCALL(sys_mlock,compat_sys_mlock) /* 150 */
+SYSCALL(sys_munlock,compat_sys_munlock)
+SYSCALL(sys_mlockall,compat_sys_mlockall)
+SYSCALL(sys_munlockall,sys_munlockall)
+SYSCALL(sys_sched_setparam,compat_sys_sched_setparam)
+SYSCALL(sys_sched_getparam,compat_sys_sched_getparam) /* 155 */
+SYSCALL(sys_sched_setscheduler,compat_sys_sched_setscheduler)
+SYSCALL(sys_sched_getscheduler,compat_sys_sched_getscheduler)
+SYSCALL(sys_sched_yield,sys_sched_yield)
+SYSCALL(sys_sched_get_priority_max,compat_sys_sched_get_priority_max)
+SYSCALL(sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */
+SYSCALL(sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval)
+SYSCALL(sys_nanosleep,compat_sys_nanosleep)
+SYSCALL(sys_mremap,compat_sys_mremap)
+SYSCALL(sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */
+NI_SYSCALL /* for vm86 */
+NI_SYSCALL /* old sys_query_module */
+SYSCALL(sys_poll,compat_sys_poll)
+NI_SYSCALL /* old nfsservctl */
+SYSCALL(sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */
+SYSCALL(sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */
+SYSCALL(sys_prctl,compat_sys_prctl)
+SYSCALL(sys_rt_sigreturn,compat_sys_rt_sigreturn)
+SYSCALL(sys_rt_sigaction,compat_sys_rt_sigaction)
+SYSCALL(sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */
+SYSCALL(sys_rt_sigpending,compat_sys_rt_sigpending)
+SYSCALL(sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait)
+SYSCALL(sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo)
+SYSCALL(sys_rt_sigsuspend,compat_sys_rt_sigsuspend)
+SYSCALL(sys_pread64,compat_sys_s390_pread64) /* 180 */
+SYSCALL(sys_pwrite64,compat_sys_s390_pwrite64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */
+SYSCALL(sys_getcwd,compat_sys_getcwd)
+SYSCALL(sys_capget,compat_sys_capget)
+SYSCALL(sys_capset,compat_sys_capset) /* 185 */
+SYSCALL(sys_sigaltstack,compat_sys_sigaltstack)
+SYSCALL(sys_sendfile64,compat_sys_sendfile)
+NI_SYSCALL /* streams1 */
+NI_SYSCALL /* streams2 */
+SYSCALL(sys_vfork,sys_vfork) /* 190 */
+SYSCALL(sys_getrlimit,compat_sys_getrlimit)
+SYSCALL(sys_mmap2,compat_sys_s390_mmap2)
+SYSCALL(sys_ni_syscall,compat_sys_s390_truncate64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_ftruncate64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_stat64) /* 195 */
+SYSCALL(sys_ni_syscall,compat_sys_s390_lstat64)
+SYSCALL(sys_ni_syscall,compat_sys_s390_fstat64)
+SYSCALL(sys_lchown,compat_sys_lchown)
+SYSCALL(sys_getuid,sys_getuid)
+SYSCALL(sys_getgid,sys_getgid) /* 200 */
+SYSCALL(sys_geteuid,sys_geteuid)
+SYSCALL(sys_getegid,sys_getegid)
+SYSCALL(sys_setreuid,compat_sys_setreuid)
+SYSCALL(sys_setregid,compat_sys_setregid)
+SYSCALL(sys_getgroups,compat_sys_getgroups) /* 205 */
+SYSCALL(sys_setgroups,compat_sys_setgroups)
+SYSCALL(sys_fchown,compat_sys_fchown)
+SYSCALL(sys_setresuid,compat_sys_setresuid)
+SYSCALL(sys_getresuid,compat_sys_getresuid)
+SYSCALL(sys_setresgid,compat_sys_setresgid) /* 210 */
+SYSCALL(sys_getresgid,compat_sys_getresgid)
+SYSCALL(sys_chown,compat_sys_chown)
+SYSCALL(sys_setuid,compat_sys_setuid)
+SYSCALL(sys_setgid,compat_sys_setgid)
+SYSCALL(sys_setfsuid,compat_sys_setfsuid) /* 215 */
+SYSCALL(sys_setfsgid,compat_sys_setfsgid)
+SYSCALL(sys_pivot_root,compat_sys_pivot_root)
+SYSCALL(sys_mincore,compat_sys_mincore)
+SYSCALL(sys_madvise,compat_sys_madvise)
+SYSCALL(sys_getdents64,compat_sys_getdents64) /* 220 */
+SYSCALL(sys_ni_syscall,compat_sys_fcntl64)
+SYSCALL(sys_readahead,compat_sys_s390_readahead)
+SYSCALL(sys_ni_syscall,compat_sys_sendfile64)
+SYSCALL(sys_setxattr,compat_sys_setxattr)
+SYSCALL(sys_lsetxattr,compat_sys_lsetxattr) /* 225 */
+SYSCALL(sys_fsetxattr,compat_sys_fsetxattr)
+SYSCALL(sys_getxattr,compat_sys_getxattr)
+SYSCALL(sys_lgetxattr,compat_sys_lgetxattr)
+SYSCALL(sys_fgetxattr,compat_sys_fgetxattr)
+SYSCALL(sys_listxattr,compat_sys_listxattr) /* 230 */
+SYSCALL(sys_llistxattr,compat_sys_llistxattr)
+SYSCALL(sys_flistxattr,compat_sys_flistxattr)
+SYSCALL(sys_removexattr,compat_sys_removexattr)
+SYSCALL(sys_lremovexattr,compat_sys_lremovexattr)
+SYSCALL(sys_fremovexattr,compat_sys_fremovexattr) /* 235 */
+SYSCALL(sys_gettid,sys_gettid)
+SYSCALL(sys_tkill,compat_sys_tkill)
+SYSCALL(sys_futex,compat_sys_futex)
+SYSCALL(sys_sched_setaffinity,compat_sys_sched_setaffinity)
+SYSCALL(sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */
+SYSCALL(sys_tgkill,compat_sys_tgkill)
+NI_SYSCALL /* reserved for TUX */
+SYSCALL(sys_io_setup,compat_sys_io_setup)
+SYSCALL(sys_io_destroy,compat_sys_io_destroy)
+SYSCALL(sys_io_getevents,compat_sys_io_getevents) /* 245 */
+SYSCALL(sys_io_submit,compat_sys_io_submit)
+SYSCALL(sys_io_cancel,compat_sys_io_cancel)
+SYSCALL(sys_exit_group,compat_sys_exit_group)
+SYSCALL(sys_epoll_create,compat_sys_epoll_create)
+SYSCALL(sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */
+SYSCALL(sys_epoll_wait,compat_sys_epoll_wait)
+SYSCALL(sys_set_tid_address,compat_sys_set_tid_address)
+SYSCALL(sys_fadvise64_64,compat_sys_s390_fadvise64)
+SYSCALL(sys_timer_create,compat_sys_timer_create)
+SYSCALL(sys_timer_settime,compat_sys_timer_settime) /* 255 */
+SYSCALL(sys_timer_gettime,compat_sys_timer_gettime)
+SYSCALL(sys_timer_getoverrun,compat_sys_timer_getoverrun)
+SYSCALL(sys_timer_delete,compat_sys_timer_delete)
+SYSCALL(sys_clock_settime,compat_sys_clock_settime)
+SYSCALL(sys_clock_gettime,compat_sys_clock_gettime) /* 260 */
+SYSCALL(sys_clock_getres,compat_sys_clock_getres)
+SYSCALL(sys_clock_nanosleep,compat_sys_clock_nanosleep)
+NI_SYSCALL /* reserved for vserver */
+SYSCALL(sys_ni_syscall,compat_sys_s390_fadvise64_64)
+SYSCALL(sys_statfs64,compat_sys_statfs64)
+SYSCALL(sys_fstatfs64,compat_sys_fstatfs64)
+SYSCALL(sys_remap_file_pages,compat_sys_remap_file_pages)
+NI_SYSCALL /* 268 sys_mbind */
+NI_SYSCALL /* 269 sys_get_mempolicy */
+NI_SYSCALL /* 270 sys_set_mempolicy */
+SYSCALL(sys_mq_open,compat_sys_mq_open)
+SYSCALL(sys_mq_unlink,compat_sys_mq_unlink)
+SYSCALL(sys_mq_timedsend,compat_sys_mq_timedsend)
+SYSCALL(sys_mq_timedreceive,compat_sys_mq_timedreceive)
+SYSCALL(sys_mq_notify,compat_sys_mq_notify) /* 275 */
+SYSCALL(sys_mq_getsetattr,compat_sys_mq_getsetattr)
+SYSCALL(sys_kexec_load,compat_sys_kexec_load)
+SYSCALL(sys_add_key,compat_sys_add_key)
+SYSCALL(sys_request_key,compat_sys_request_key)
+SYSCALL(sys_keyctl,compat_sys_keyctl) /* 280 */
+SYSCALL(sys_waitid,compat_sys_waitid)
+SYSCALL(sys_ioprio_set,compat_sys_ioprio_set)
+SYSCALL(sys_ioprio_get,compat_sys_ioprio_get)
+SYSCALL(sys_inotify_init,sys_inotify_init)
+SYSCALL(sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */
+SYSCALL(sys_inotify_rm_watch,compat_sys_inotify_rm_watch)
+NI_SYSCALL /* 287 sys_migrate_pages */
+SYSCALL(sys_openat,compat_sys_openat)
+SYSCALL(sys_mkdirat,compat_sys_mkdirat)
+SYSCALL(sys_mknodat,compat_sys_mknodat) /* 290 */
+SYSCALL(sys_fchownat,compat_sys_fchownat)
+SYSCALL(sys_futimesat,compat_sys_futimesat)
+SYSCALL(sys_newfstatat,compat_sys_s390_fstatat64)
+SYSCALL(sys_unlinkat,compat_sys_unlinkat)
+SYSCALL(sys_renameat,compat_sys_renameat) /* 295 */
+SYSCALL(sys_linkat,compat_sys_linkat)
+SYSCALL(sys_symlinkat,compat_sys_symlinkat)
+SYSCALL(sys_readlinkat,compat_sys_readlinkat)
+SYSCALL(sys_fchmodat,compat_sys_fchmodat)
+SYSCALL(sys_faccessat,compat_sys_faccessat) /* 300 */
+SYSCALL(sys_pselect6,compat_sys_pselect6)
+SYSCALL(sys_ppoll,compat_sys_ppoll)
+SYSCALL(sys_unshare,compat_sys_unshare)
+SYSCALL(sys_set_robust_list,compat_sys_set_robust_list)
+SYSCALL(sys_get_robust_list,compat_sys_get_robust_list)
+SYSCALL(sys_splice,compat_sys_splice)
+SYSCALL(sys_sync_file_range,compat_sys_s390_sync_file_range)
+SYSCALL(sys_tee,compat_sys_tee)
+SYSCALL(sys_vmsplice,compat_sys_vmsplice)
+NI_SYSCALL /* 310 sys_move_pages */
+SYSCALL(sys_getcpu,compat_sys_getcpu)
+SYSCALL(sys_epoll_pwait,compat_sys_epoll_pwait)
+SYSCALL(sys_utimes,compat_sys_utimes)
+SYSCALL(sys_fallocate,compat_sys_s390_fallocate)
+SYSCALL(sys_utimensat,compat_sys_utimensat) /* 315 */
+SYSCALL(sys_signalfd,compat_sys_signalfd)
+NI_SYSCALL /* 317 old sys_timer_fd */
+SYSCALL(sys_eventfd,compat_sys_eventfd)
+SYSCALL(sys_timerfd_create,compat_sys_timerfd_create)
+SYSCALL(sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */
+SYSCALL(sys_timerfd_gettime,compat_sys_timerfd_gettime)
+SYSCALL(sys_signalfd4,compat_sys_signalfd4)
+SYSCALL(sys_eventfd2,compat_sys_eventfd2)
+SYSCALL(sys_inotify_init1,compat_sys_inotify_init1)
+SYSCALL(sys_pipe2,compat_sys_pipe2) /* 325 */
+SYSCALL(sys_dup3,compat_sys_dup3)
+SYSCALL(sys_epoll_create1,compat_sys_epoll_create1)
+SYSCALL(sys_preadv,compat_sys_preadv)
+SYSCALL(sys_pwritev,compat_sys_pwritev)
+SYSCALL(sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */
+SYSCALL(sys_perf_event_open,compat_sys_perf_event_open)
+SYSCALL(sys_fanotify_init,compat_sys_fanotify_init)
+SYSCALL(sys_fanotify_mark,compat_sys_fanotify_mark)
+SYSCALL(sys_prlimit64,compat_sys_prlimit64)
+SYSCALL(sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */
+SYSCALL(sys_open_by_handle_at,compat_sys_open_by_handle_at)
+SYSCALL(sys_clock_adjtime,compat_sys_clock_adjtime)
+SYSCALL(sys_syncfs,compat_sys_syncfs)
+SYSCALL(sys_setns,compat_sys_setns)
+SYSCALL(sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */
+SYSCALL(sys_process_vm_writev,compat_sys_process_vm_writev)
+SYSCALL(sys_s390_runtime_instr,compat_sys_s390_runtime_instr)
+SYSCALL(sys_kcmp,compat_sys_kcmp)
+SYSCALL(sys_finit_module,compat_sys_finit_module)
+SYSCALL(sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
+SYSCALL(sys_sched_getattr,compat_sys_sched_getattr)
+SYSCALL(sys_renameat2,compat_sys_renameat2)
+SYSCALL(sys_seccomp,compat_sys_seccomp)
+SYSCALL(sys_getrandom,compat_sys_getrandom)
+SYSCALL(sys_memfd_create,compat_sys_memfd_create) /* 350 */
+SYSCALL(sys_bpf,compat_sys_bpf)
+SYSCALL(sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write)
+SYSCALL(sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read)
+SYSCALL(sys_execveat,compat_sys_execveat)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
new file mode 100644
index 000000000..99babea02
--- /dev/null
+++ b/arch/s390/kernel/sysinfo.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright IBM Corp. 2001, 2009
+ * Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>,
+ * Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/topology.h>
+
+/* Sigh, math-emu. Don't ask. */
+#include <asm/sfp-util.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+int topology_max_mnest;
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+ register int r0 asm("0") = (fc << 28) | sel1;
+ register int r1 asm("1") = sel2;
+ int rc = 0;
+
+ asm volatile(
+ " stsi 0(%3)\n"
+ "0: jz 2f\n"
+ "1: lhi %1,%4\n"
+ "2:\n"
+ EX_TABLE(0b, 1b)
+ : "+d" (r0), "+d" (rc)
+ : "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
+ : "cc", "memory");
+ if (rc)
+ return rc;
+ return fc ? 0 : ((unsigned int) r0) >> 28;
+}
+EXPORT_SYMBOL(stsi);
+
+static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
+{
+ int i;
+
+ if (stsi(info, 1, 1, 1))
+ return;
+ EBCASC(info->manufacturer, sizeof(info->manufacturer));
+ EBCASC(info->type, sizeof(info->type));
+ EBCASC(info->model, sizeof(info->model));
+ EBCASC(info->sequence, sizeof(info->sequence));
+ EBCASC(info->plant, sizeof(info->plant));
+ EBCASC(info->model_capacity, sizeof(info->model_capacity));
+ EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
+ EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+ seq_printf(m, "Manufacturer: %-16.16s\n", info->manufacturer);
+ seq_printf(m, "Type: %-4.4s\n", info->type);
+ /*
+ * Sigh: the model field has been renamed with System z9
+ * to model_capacity and a new model field has been added
+ * after the plant field. To avoid confusing older programs
+ * the "Model:" prints "model_capacity model" or just
+ * "model_capacity" if the model string is empty .
+ */
+ seq_printf(m, "Model: %-16.16s", info->model_capacity);
+ if (info->model[0] != '\0')
+ seq_printf(m, " %-16.16s", info->model);
+ seq_putc(m, '\n');
+ seq_printf(m, "Sequence Code: %-16.16s\n", info->sequence);
+ seq_printf(m, "Plant: %-4.4s\n", info->plant);
+ seq_printf(m, "Model Capacity: %-16.16s %08u\n",
+ info->model_capacity, info->model_cap_rating);
+ if (info->model_perm_cap_rating)
+ seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n",
+ info->model_perm_cap,
+ info->model_perm_cap_rating);
+ if (info->model_temp_cap_rating)
+ seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
+ info->model_temp_cap,
+ info->model_temp_cap_rating);
+ if (info->ncr)
+ seq_printf(m, "Nominal Cap. Rating: %08u\n", info->ncr);
+ if (info->npr)
+ seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
+ if (info->ntr)
+ seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+ if (info->cai) {
+ seq_printf(m, "Capacity Adj. Ind.: %d\n", info->cai);
+ seq_printf(m, "Capacity Ch. Reason: %d\n", info->ccr);
+ seq_printf(m, "Capacity Transient: %d\n", info->t);
+ }
+ if (info->p) {
+ for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) {
+ seq_printf(m, "Type %d Percentage: %d\n",
+ i, info->typepct[i - 1]);
+ }
+ }
+}
+
+static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
+{
+ static int max_mnest;
+ int i, rc;
+
+ seq_putc(m, '\n');
+ if (!MACHINE_HAS_TOPOLOGY)
+ return;
+ if (stsi(info, 15, 1, topology_max_mnest))
+ return;
+ seq_printf(m, "CPU Topology HW: ");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ seq_printf(m, " %d", info->mag[i]);
+ seq_putc(m, '\n');
+#ifdef CONFIG_SCHED_MC
+ store_topology(info);
+ seq_printf(m, "CPU Topology SW: ");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ seq_printf(m, " %d", info->mag[i]);
+ seq_putc(m, '\n');
+#endif
+}
+
+static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
+{
+ struct sysinfo_1_2_2_extension *ext;
+ int i;
+
+ if (stsi(info, 1, 2, 2))
+ return;
+ ext = (struct sysinfo_1_2_2_extension *)
+ ((unsigned long) info + info->acc_offset);
+ seq_printf(m, "CPUs Total: %d\n", info->cpus_total);
+ seq_printf(m, "CPUs Configured: %d\n", info->cpus_configured);
+ seq_printf(m, "CPUs Standby: %d\n", info->cpus_standby);
+ seq_printf(m, "CPUs Reserved: %d\n", info->cpus_reserved);
+ /*
+ * Sigh 2. According to the specification the alternate
+ * capability field is a 32 bit floating point number
+ * if the higher order 8 bits are not zero. Printing
+ * a floating point number in the kernel is a no-no,
+ * always print the number as 32 bit unsigned integer.
+ * The user-space needs to know about the strange
+ * encoding of the alternate cpu capability.
+ */
+ seq_printf(m, "Capability: %u", info->capability);
+ if (info->format == 1)
+ seq_printf(m, " %u", ext->alt_capability);
+ seq_putc(m, '\n');
+ if (info->nominal_cap)
+ seq_printf(m, "Nominal Capability: %d\n", info->nominal_cap);
+ if (info->secondary_cap)
+ seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap);
+ for (i = 2; i <= info->cpus_total; i++) {
+ seq_printf(m, "Adjustment %02d-way: %u",
+ i, info->adjustment[i-2]);
+ if (info->format == 1)
+ seq_printf(m, " %u", ext->alt_adjustment[i-2]);
+ seq_putc(m, '\n');
+ }
+}
+
+static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
+{
+ if (stsi(info, 2, 2, 2))
+ return;
+ EBCASC(info->name, sizeof(info->name));
+ seq_putc(m, '\n');
+ seq_printf(m, "LPAR Number: %d\n", info->lpar_number);
+ seq_printf(m, "LPAR Characteristics: ");
+ if (info->characteristics & LPAR_CHAR_DEDICATED)
+ seq_printf(m, "Dedicated ");
+ if (info->characteristics & LPAR_CHAR_SHARED)
+ seq_printf(m, "Shared ");
+ if (info->characteristics & LPAR_CHAR_LIMITED)
+ seq_printf(m, "Limited ");
+ seq_putc(m, '\n');
+ seq_printf(m, "LPAR Name: %-8.8s\n", info->name);
+ seq_printf(m, "LPAR Adjustment: %d\n", info->caf);
+ seq_printf(m, "LPAR CPUs Total: %d\n", info->cpus_total);
+ seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured);
+ seq_printf(m, "LPAR CPUs Standby: %d\n", info->cpus_standby);
+ seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved);
+ seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated);
+ seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared);
+ if (info->mt_installed & 0x80) {
+ seq_printf(m, "LPAR CPUs G-MTID: %d\n",
+ info->mt_general & 0x1f);
+ seq_printf(m, "LPAR CPUs S-MTID: %d\n",
+ info->mt_installed & 0x1f);
+ seq_printf(m, "LPAR CPUs PS-MTID: %d\n",
+ info->mt_psmtid & 0x1f);
+ }
+}
+
+static void print_ext_name(struct seq_file *m, int lvl,
+ struct sysinfo_3_2_2 *info)
+{
+ if (info->vm[lvl].ext_name_encoding == 0)
+ return;
+ if (info->ext_names[lvl][0] == 0)
+ return;
+ switch (info->vm[lvl].ext_name_encoding) {
+ case 1: /* EBCDIC */
+ EBCASC(info->ext_names[lvl], sizeof(info->ext_names[lvl]));
+ break;
+ case 2: /* UTF-8 */
+ break;
+ default:
+ return;
+ }
+ seq_printf(m, "VM%02d Extended Name: %-.256s\n", lvl,
+ info->ext_names[lvl]);
+}
+
+static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
+{
+ if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be)))
+ return;
+ seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid);
+}
+
+static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
+{
+ int i;
+
+ if (stsi(info, 3, 2, 2))
+ return;
+ for (i = 0; i < info->count; i++) {
+ EBCASC(info->vm[i].name, sizeof(info->vm[i].name));
+ EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi));
+ seq_putc(m, '\n');
+ seq_printf(m, "VM%02d Name: %-8.8s\n", i, info->vm[i].name);
+ seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi);
+ seq_printf(m, "VM%02d Adjustment: %d\n", i, info->vm[i].caf);
+ seq_printf(m, "VM%02d CPUs Total: %d\n", i, info->vm[i].cpus_total);
+ seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
+ seq_printf(m, "VM%02d CPUs Standby: %d\n", i, info->vm[i].cpus_standby);
+ seq_printf(m, "VM%02d CPUs Reserved: %d\n", i, info->vm[i].cpus_reserved);
+ print_ext_name(m, i, info);
+ print_uuid(m, i, info);
+ }
+}
+
+static int sysinfo_show(struct seq_file *m, void *v)
+{
+ void *info = (void *)get_zeroed_page(GFP_KERNEL);
+ int level;
+
+ if (!info)
+ return 0;
+ level = stsi(NULL, 0, 0, 0);
+ if (level >= 1)
+ stsi_1_1_1(m, info);
+ if (level >= 1)
+ stsi_15_1_x(m, info);
+ if (level >= 1)
+ stsi_1_2_2(m, info);
+ if (level >= 2)
+ stsi_2_2_2(m, info);
+ if (level >= 3)
+ stsi_3_2_2(m, info);
+ free_page((unsigned long)info);
+ return 0;
+}
+
+static int sysinfo_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, sysinfo_show, NULL);
+}
+
+static const struct file_operations sysinfo_fops = {
+ .open = sysinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init sysinfo_create_proc(void)
+{
+ proc_create("sysinfo", 0444, NULL, &sysinfo_fops);
+ return 0;
+}
+device_initcall(sysinfo_create_proc);
+
+/*
+ * Service levels interface.
+ */
+
+static DECLARE_RWSEM(service_level_sem);
+static LIST_HEAD(service_level_list);
+
+int register_service_level(struct service_level *slr)
+{
+ struct service_level *ptr;
+
+ down_write(&service_level_sem);
+ list_for_each_entry(ptr, &service_level_list, list)
+ if (ptr == slr) {
+ up_write(&service_level_sem);
+ return -EEXIST;
+ }
+ list_add_tail(&slr->list, &service_level_list);
+ up_write(&service_level_sem);
+ return 0;
+}
+EXPORT_SYMBOL(register_service_level);
+
+int unregister_service_level(struct service_level *slr)
+{
+ struct service_level *ptr, *next;
+ int rc = -ENOENT;
+
+ down_write(&service_level_sem);
+ list_for_each_entry_safe(ptr, next, &service_level_list, list) {
+ if (ptr != slr)
+ continue;
+ list_del(&ptr->list);
+ rc = 0;
+ break;
+ }
+ up_write(&service_level_sem);
+ return rc;
+}
+EXPORT_SYMBOL(unregister_service_level);
+
+static void *service_level_start(struct seq_file *m, loff_t *pos)
+{
+ down_read(&service_level_sem);
+ return seq_list_start(&service_level_list, *pos);
+}
+
+static void *service_level_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &service_level_list, pos);
+}
+
+static void service_level_stop(struct seq_file *m, void *p)
+{
+ up_read(&service_level_sem);
+}
+
+static int service_level_show(struct seq_file *m, void *p)
+{
+ struct service_level *slr;
+
+ slr = list_entry(p, struct service_level, list);
+ slr->seq_print(m, slr);
+ return 0;
+}
+
+static const struct seq_operations service_level_seq_ops = {
+ .start = service_level_start,
+ .next = service_level_next,
+ .stop = service_level_stop,
+ .show = service_level_show
+};
+
+static int service_level_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &service_level_seq_ops);
+}
+
+static const struct file_operations service_level_ops = {
+ .open = service_level_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static void service_level_vm_print(struct seq_file *m,
+ struct service_level *slr)
+{
+ char *query_buffer, *str;
+
+ query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+ if (!query_buffer)
+ return;
+ cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
+ str = strchr(query_buffer, '\n');
+ if (str)
+ *str = 0;
+ seq_printf(m, "VM: %s\n", query_buffer);
+ kfree(query_buffer);
+}
+
+static struct service_level service_level_vm = {
+ .seq_print = service_level_vm_print
+};
+
+static __init int create_proc_service_level(void)
+{
+ proc_create("service_levels", 0, NULL, &service_level_ops);
+ if (MACHINE_IS_VM)
+ register_service_level(&service_level_vm);
+ return 0;
+}
+subsys_initcall(create_proc_service_level);
+
+/*
+ * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
+ */
+void s390_adjust_jiffies(void)
+{
+ struct sysinfo_1_2_2 *info;
+ const unsigned int fmil = 0x4b189680; /* 1e7 as 32-bit float. */
+ FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+ FP_DECL_EX;
+ unsigned int capability;
+
+ info = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!info)
+ return;
+
+ if (stsi(info, 1, 2, 2) == 0) {
+ /*
+ * Major sigh. The cpu capability encoding is "special".
+ * If the first 9 bits of info->capability are 0 then it
+ * is a 32 bit unsigned integer in the range 0 .. 2^23.
+ * If the first 9 bits are != 0 then it is a 32 bit float.
+ * In addition a lower value indicates a proportionally
+ * higher cpu capacity. Bogomips are the other way round.
+ * To get to a halfway suitable number we divide 1e7
+ * by the cpu capability number. Yes, that means a floating
+ * point division .. math-emu here we come :-)
+ */
+ FP_UNPACK_SP(SA, &fmil);
+ if ((info->capability >> 23) == 0)
+ FP_FROM_INT_S(SB, (long) info->capability, 64, long);
+ else
+ FP_UNPACK_SP(SB, &info->capability);
+ FP_DIV_S(SR, SA, SB);
+ FP_TO_INT_S(capability, SR, 32, 0);
+ } else
+ /*
+ * Really old machine without stsi block for basic
+ * cpu information. Report 42.0 bogomips.
+ */
+ capability = 42;
+ loops_per_jiffy = capability * (500000/HZ);
+ free_page((unsigned long) info);
+}
+
+/*
+ * calibrate the delay loop
+ */
+void calibrate_delay(void)
+{
+ s390_adjust_jiffies();
+ /* Print the good old Bogomips line .. */
+ printk(KERN_DEBUG "Calibrating delay loop (skipped)... "
+ "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
new file mode 100644
index 000000000..170ddd201
--- /dev/null
+++ b/arch/s390/kernel/time.c
@@ -0,0 +1,1802 @@
+/*
+ * Time of day based timer functions.
+ *
+ * S390 version
+ * Copyright IBM Corp. 1999, 2008
+ * Author(s): Hartmut Penner (hp@de.ibm.com),
+ * Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ * Derived from "arch/i386/kernel/time.c"
+ * Copyright (C) 1991, 1992, 1995 Linus Torvalds
+ */
+
+#define KMSG_COMPONENT "time"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/stop_machine.h>
+#include <linux/time.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+#include <linux/notifier.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/clockchips.h>
+#include <linux/gfp.h>
+#include <linux/kprobes.h>
+#include <asm/uaccess.h>
+#include <asm/delay.h>
+#include <asm/div64.h>
+#include <asm/vdso.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/vtimer.h>
+#include <asm/etr.h>
+#include <asm/cio.h>
+#include "entry.h"
+
+/* change this if you have some constant time drift */
+#define USECS_PER_JIFFY ((unsigned long) 1000000/HZ)
+#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12)
+
+u64 sched_clock_base_cc = -1; /* Force to data section. */
+EXPORT_SYMBOL_GPL(sched_clock_base_cc);
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long notrace sched_clock(void)
+{
+ return tod_to_ns(get_tod_clock_monotonic());
+}
+NOKPROBE_SYMBOL(sched_clock);
+
+/*
+ * Monotonic_clock - returns # of nanoseconds passed since time_init()
+ */
+unsigned long long monotonic_clock(void)
+{
+ return sched_clock();
+}
+EXPORT_SYMBOL(monotonic_clock);
+
+void tod_to_timeval(__u64 todval, struct timespec *xt)
+{
+ unsigned long long sec;
+
+ sec = todval >> 12;
+ do_div(sec, 1000000);
+ xt->tv_sec = sec;
+ todval -= (sec * 1000000) << 12;
+ xt->tv_nsec = ((todval * 1000) >> 12);
+}
+EXPORT_SYMBOL(tod_to_timeval);
+
+void clock_comparator_work(void)
+{
+ struct clock_event_device *cd;
+
+ S390_lowcore.clock_comparator = -1ULL;
+ cd = this_cpu_ptr(&comparators);
+ cd->event_handler(cd);
+}
+
+/*
+ * Fixup the clock comparator.
+ */
+static void fixup_clock_comparator(unsigned long long delta)
+{
+ /* If nobody is waiting there's nothing to fix. */
+ if (S390_lowcore.clock_comparator == -1ULL)
+ return;
+ S390_lowcore.clock_comparator += delta;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+static int s390_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ S390_lowcore.clock_comparator = get_tod_clock() + delta;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ return 0;
+}
+
+static void s390_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+}
+
+/*
+ * Set up lowcore and control register of the current cpu to
+ * enable TOD clock and clock comparator interrupts.
+ */
+void init_cpu_timer(void)
+{
+ struct clock_event_device *cd;
+ int cpu;
+
+ S390_lowcore.clock_comparator = -1ULL;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+
+ cpu = smp_processor_id();
+ cd = &per_cpu(comparators, cpu);
+ cd->name = "comparator";
+ cd->features = CLOCK_EVT_FEAT_ONESHOT;
+ cd->mult = 16777;
+ cd->shift = 12;
+ cd->min_delta_ns = 1;
+ cd->max_delta_ns = LONG_MAX;
+ cd->rating = 400;
+ cd->cpumask = cpumask_of(cpu);
+ cd->set_next_event = s390_next_event;
+ cd->set_mode = s390_set_mode;
+
+ clockevents_register_device(cd);
+
+ /* Enable clock comparator timer interrupt. */
+ __ctl_set_bit(0,11);
+
+ /* Always allow the timing alert external interrupt. */
+ __ctl_set_bit(0, 4);
+}
+
+static void clock_comparator_interrupt(struct ext_code ext_code,
+ unsigned int param32,
+ unsigned long param64)
+{
+ inc_irq_stat(IRQEXT_CLK);
+ if (S390_lowcore.clock_comparator == -1ULL)
+ set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+static void etr_timing_alert(struct etr_irq_parm *);
+static void stp_timing_alert(struct stp_irq_parm *);
+
+static void timing_alert_interrupt(struct ext_code ext_code,
+ unsigned int param32, unsigned long param64)
+{
+ inc_irq_stat(IRQEXT_TLA);
+ if (param32 & 0x00c40000)
+ etr_timing_alert((struct etr_irq_parm *) &param32);
+ if (param32 & 0x00038000)
+ stp_timing_alert((struct stp_irq_parm *) &param32);
+}
+
+static void etr_reset(void);
+static void stp_reset(void);
+
+void read_persistent_clock(struct timespec *ts)
+{
+ tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
+}
+
+void read_boot_clock(struct timespec *ts)
+{
+ tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+}
+
+static cycle_t read_tod_clock(struct clocksource *cs)
+{
+ return get_tod_clock();
+}
+
+static struct clocksource clocksource_tod = {
+ .name = "tod",
+ .rating = 400,
+ .read = read_tod_clock,
+ .mask = -1ULL,
+ .mult = 1000,
+ .shift = 12,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+struct clocksource * __init clocksource_default_clock(void)
+{
+ return &clocksource_tod;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ u64 nsecps;
+
+ if (tk->tkr_mono.clock != &clocksource_tod)
+ return;
+
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
+ vdso_data->xtime_clock_sec = tk->xtime_sec;
+ vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
+ vdso_data->wtom_clock_sec =
+ tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
+ while (vdso_data->wtom_clock_nsec >= nsecps) {
+ vdso_data->wtom_clock_nsec -= nsecps;
+ vdso_data->wtom_clock_sec++;
+ }
+
+ vdso_data->xtime_coarse_sec = tk->xtime_sec;
+ vdso_data->xtime_coarse_nsec =
+ (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
+ vdso_data->wtom_coarse_sec =
+ vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_coarse_nsec =
+ vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+ while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) {
+ vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC;
+ vdso_data->wtom_coarse_sec++;
+ }
+
+ vdso_data->tk_mult = tk->tkr_mono.mult;
+ vdso_data->tk_shift = tk->tkr_mono.shift;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
+}
+
+extern struct timezone sys_tz;
+
+void update_vsyscall_tz(void)
+{
+ /* Make userspace gettimeofday spin until we're done. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
+}
+
+/*
+ * Initialize the TOD clock and the CPU timer of
+ * the boot cpu.
+ */
+void __init time_init(void)
+{
+ /* Reset time synchronization interfaces. */
+ etr_reset();
+ stp_reset();
+
+ /* request the clock comparator external interrupt */
+ if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+ panic("Couldn't request external interrupt 0x1004");
+
+ /* request the timing alert external interrupt */
+ if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
+ panic("Couldn't request external interrupt 0x1406");
+
+ if (__clocksource_register(&clocksource_tod) != 0)
+ panic("Could not register TOD clock source");
+
+ /* Enable TOD clock interrupts on the boot cpu. */
+ init_cpu_timer();
+
+ /* Enable cpu timer interrupts on the boot cpu. */
+ vtime_init();
+}
+
+/*
+ * The time is "clock". old is what we think the time is.
+ * Adjust the value by a multiple of jiffies and add the delta to ntp.
+ * "delay" is an approximation how long the synchronization took. If
+ * the time correction is positive, then "delay" is subtracted from
+ * the time difference and only the remaining part is passed to ntp.
+ */
+static unsigned long long adjust_time(unsigned long long old,
+ unsigned long long clock,
+ unsigned long long delay)
+{
+ unsigned long long delta, ticks;
+ struct timex adjust;
+
+ if (clock > old) {
+ /* It is later than we thought. */
+ delta = ticks = clock - old;
+ delta = ticks = (delta < delay) ? 0 : delta - delay;
+ delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+ adjust.offset = ticks * (1000000 / HZ);
+ } else {
+ /* It is earlier than we thought. */
+ delta = ticks = old - clock;
+ delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+ delta = -delta;
+ adjust.offset = -ticks * (1000000 / HZ);
+ }
+ sched_clock_base_cc += delta;
+ if (adjust.offset != 0) {
+ pr_notice("The ETR interface has adjusted the clock "
+ "by %li microseconds\n", adjust.offset);
+ adjust.modes = ADJ_OFFSET_SINGLESHOT;
+ do_adjtimex(&adjust);
+ }
+ return delta;
+}
+
+static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static DEFINE_MUTEX(clock_sync_mutex);
+static unsigned long clock_sync_flags;
+
+#define CLOCK_SYNC_HAS_ETR 0
+#define CLOCK_SYNC_HAS_STP 1
+#define CLOCK_SYNC_ETR 2
+#define CLOCK_SYNC_STP 3
+
+/*
+ * The synchronous get_clock function. It will write the current clock
+ * value to the clock pointer and return 0 if the clock is in sync with
+ * the external time source. If the clock mode is local it will return
+ * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
+ * reference.
+ */
+int get_sync_clock(unsigned long long *clock)
+{
+ atomic_t *sw_ptr;
+ unsigned int sw0, sw1;
+
+ sw_ptr = &get_cpu_var(clock_sync_word);
+ sw0 = atomic_read(sw_ptr);
+ *clock = get_tod_clock();
+ sw1 = atomic_read(sw_ptr);
+ put_cpu_var(clock_sync_word);
+ if (sw0 == sw1 && (sw0 & 0x80000000U))
+ /* Success: time is in sync. */
+ return 0;
+ if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
+ !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return -EOPNOTSUPP;
+ if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
+ !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+ return -EACCES;
+ return -EAGAIN;
+}
+EXPORT_SYMBOL(get_sync_clock);
+
+/*
+ * Make get_sync_clock return -EAGAIN.
+ */
+static void disable_sync_clock(void *dummy)
+{
+ atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+ /*
+ * Clear the in-sync bit 2^31. All get_sync_clock calls will
+ * fail until the sync bit is turned back on. In addition
+ * increase the "sequence" counter to avoid the race of an
+ * etr event and the complete recovery against get_sync_clock.
+ */
+ atomic_clear_mask(0x80000000, sw_ptr);
+ atomic_inc(sw_ptr);
+}
+
+/*
+ * Make get_sync_clock return 0 again.
+ * Needs to be called from a context disabled for preemption.
+ */
+static void enable_sync_clock(void)
+{
+ atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+ atomic_set_mask(0x80000000, sw_ptr);
+}
+
+/*
+ * Function to check if the clock is in sync.
+ */
+static inline int check_sync_clock(void)
+{
+ atomic_t *sw_ptr;
+ int rc;
+
+ sw_ptr = &get_cpu_var(clock_sync_word);
+ rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
+ put_cpu_var(clock_sync_word);
+ return rc;
+}
+
+/* Single threaded workqueue used for etr and stp sync events */
+static struct workqueue_struct *time_sync_wq;
+
+static void __init time_init_wq(void)
+{
+ if (time_sync_wq)
+ return;
+ time_sync_wq = create_singlethread_workqueue("timesync");
+}
+
+/*
+ * External Time Reference (ETR) code.
+ */
+static int etr_port0_online;
+static int etr_port1_online;
+static int etr_steai_available;
+
+static int __init early_parse_etr(char *p)
+{
+ if (strncmp(p, "off", 3) == 0)
+ etr_port0_online = etr_port1_online = 0;
+ else if (strncmp(p, "port0", 5) == 0)
+ etr_port0_online = 1;
+ else if (strncmp(p, "port1", 5) == 0)
+ etr_port1_online = 1;
+ else if (strncmp(p, "on", 2) == 0)
+ etr_port0_online = etr_port1_online = 1;
+ return 0;
+}
+early_param("etr", early_parse_etr);
+
+enum etr_event {
+ ETR_EVENT_PORT0_CHANGE,
+ ETR_EVENT_PORT1_CHANGE,
+ ETR_EVENT_PORT_ALERT,
+ ETR_EVENT_SYNC_CHECK,
+ ETR_EVENT_SWITCH_LOCAL,
+ ETR_EVENT_UPDATE,
+};
+
+/*
+ * Valid bit combinations of the eacr register are (x = don't care):
+ * e0 e1 dp p0 p1 ea es sl
+ * 0 0 x 0 0 0 0 0 initial, disabled state
+ * 0 0 x 0 1 1 0 0 port 1 online
+ * 0 0 x 1 0 1 0 0 port 0 online
+ * 0 0 x 1 1 1 0 0 both ports online
+ * 0 1 x 0 1 1 0 0 port 1 online and usable, ETR or PPS mode
+ * 0 1 x 0 1 1 0 1 port 1 online, usable and ETR mode
+ * 0 1 x 0 1 1 1 0 port 1 online, usable, PPS mode, in-sync
+ * 0 1 x 0 1 1 1 1 port 1 online, usable, ETR mode, in-sync
+ * 0 1 x 1 1 1 0 0 both ports online, port 1 usable
+ * 0 1 x 1 1 1 1 0 both ports online, port 1 usable, PPS mode, in-sync
+ * 0 1 x 1 1 1 1 1 both ports online, port 1 usable, ETR mode, in-sync
+ * 1 0 x 1 0 1 0 0 port 0 online and usable, ETR or PPS mode
+ * 1 0 x 1 0 1 0 1 port 0 online, usable and ETR mode
+ * 1 0 x 1 0 1 1 0 port 0 online, usable, PPS mode, in-sync
+ * 1 0 x 1 0 1 1 1 port 0 online, usable, ETR mode, in-sync
+ * 1 0 x 1 1 1 0 0 both ports online, port 0 usable
+ * 1 0 x 1 1 1 1 0 both ports online, port 0 usable, PPS mode, in-sync
+ * 1 0 x 1 1 1 1 1 both ports online, port 0 usable, ETR mode, in-sync
+ * 1 1 x 1 1 1 1 0 both ports online & usable, ETR, in-sync
+ * 1 1 x 1 1 1 1 1 both ports online & usable, ETR, in-sync
+ */
+static struct etr_eacr etr_eacr;
+static u64 etr_tolec; /* time of last eacr update */
+static struct etr_aib etr_port0;
+static int etr_port0_uptodate;
+static struct etr_aib etr_port1;
+static int etr_port1_uptodate;
+static unsigned long etr_events;
+static struct timer_list etr_timer;
+
+static void etr_timeout(unsigned long dummy);
+static void etr_work_fn(struct work_struct *work);
+static DEFINE_MUTEX(etr_work_mutex);
+static DECLARE_WORK(etr_work, etr_work_fn);
+
+/*
+ * Reset ETR attachment.
+ */
+static void etr_reset(void)
+{
+ etr_eacr = (struct etr_eacr) {
+ .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
+ .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
+ .es = 0, .sl = 0 };
+ if (etr_setr(&etr_eacr) == 0) {
+ etr_tolec = get_tod_clock();
+ set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ } else if (etr_port0_online || etr_port1_online) {
+ pr_warning("The real or virtual hardware system does "
+ "not provide an ETR interface\n");
+ etr_port0_online = etr_port1_online = 0;
+ }
+}
+
+static int __init etr_init(void)
+{
+ struct etr_aib aib;
+
+ if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
+ return 0;
+ time_init_wq();
+ /* Check if this machine has the steai instruction. */
+ if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
+ etr_steai_available = 1;
+ setup_timer(&etr_timer, etr_timeout, 0UL);
+ if (etr_port0_online) {
+ set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
+ queue_work(time_sync_wq, &etr_work);
+ }
+ if (etr_port1_online) {
+ set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
+ queue_work(time_sync_wq, &etr_work);
+ }
+ return 0;
+}
+
+arch_initcall(etr_init);
+
+/*
+ * Two sorts of ETR machine checks. The architecture reads:
+ * "When a machine-check niterruption occurs and if a switch-to-local or
+ * ETR-sync-check interrupt request is pending but disabled, this pending
+ * disabled interruption request is indicated and is cleared".
+ * Which means that we can get etr_switch_to_local events from the machine
+ * check handler although the interruption condition is disabled. Lovely..
+ */
+
+/*
+ * Switch to local machine check. This is called when the last usable
+ * ETR port goes inactive. After switch to local the clock is not in sync.
+ */
+void etr_switch_to_local(void)
+{
+ if (!etr_eacr.sl)
+ return;
+ disable_sync_clock(NULL);
+ if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
+ etr_eacr.es = etr_eacr.sl = 0;
+ etr_setr(&etr_eacr);
+ queue_work(time_sync_wq, &etr_work);
+ }
+}
+
+/*
+ * ETR sync check machine check. This is called when the ETR OTE and the
+ * local clock OTE are farther apart than the ETR sync check tolerance.
+ * After a ETR sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+void etr_sync_check(void)
+{
+ if (!etr_eacr.es)
+ return;
+ disable_sync_clock(NULL);
+ if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
+ etr_eacr.es = 0;
+ etr_setr(&etr_eacr);
+ queue_work(time_sync_wq, &etr_work);
+ }
+}
+
+/*
+ * ETR timing alert. There are two causes:
+ * 1) port state change, check the usability of the port
+ * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
+ * sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
+ * or ETR-data word 4 (edf4) has changed.
+ */
+static void etr_timing_alert(struct etr_irq_parm *intparm)
+{
+ if (intparm->pc0)
+ /* ETR port 0 state change. */
+ set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
+ if (intparm->pc1)
+ /* ETR port 1 state change. */
+ set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
+ if (intparm->eai)
+ /*
+ * ETR port alert on either port 0, 1 or both.
+ * Both ports are not up-to-date now.
+ */
+ set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
+ queue_work(time_sync_wq, &etr_work);
+}
+
+static void etr_timeout(unsigned long dummy)
+{
+ set_bit(ETR_EVENT_UPDATE, &etr_events);
+ queue_work(time_sync_wq, &etr_work);
+}
+
+/*
+ * Check if the etr mode is pss.
+ */
+static inline int etr_mode_is_pps(struct etr_eacr eacr)
+{
+ return eacr.es && !eacr.sl;
+}
+
+/*
+ * Check if the etr mode is etr.
+ */
+static inline int etr_mode_is_etr(struct etr_eacr eacr)
+{
+ return eacr.es && eacr.sl;
+}
+
+/*
+ * Check if the port can be used for TOD synchronization.
+ * For PPS mode the port has to receive OTEs. For ETR mode
+ * the port has to receive OTEs, the ETR stepping bit has to
+ * be zero and the validity bits for data frame 1, 2, and 3
+ * have to be 1.
+ */
+static int etr_port_valid(struct etr_aib *aib, int port)
+{
+ unsigned int psc;
+
+ /* Check that this port is receiving OTEs. */
+ if (aib->tsp == 0)
+ return 0;
+
+ psc = port ? aib->esw.psc1 : aib->esw.psc0;
+ if (psc == etr_lpsc_pps_mode)
+ return 1;
+ if (psc == etr_lpsc_operational_step)
+ return !aib->esw.y && aib->slsw.v1 &&
+ aib->slsw.v2 && aib->slsw.v3;
+ return 0;
+}
+
+/*
+ * Check if two ports are on the same network.
+ */
+static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2)
+{
+ // FIXME: any other fields we have to compare?
+ return aib1->edf1.net_id == aib2->edf1.net_id;
+}
+
+/*
+ * Wrapper for etr_stei that converts physical port states
+ * to logical port states to be consistent with the output
+ * of stetr (see etr_psc vs. etr_lpsc).
+ */
+static void etr_steai_cv(struct etr_aib *aib, unsigned int func)
+{
+ BUG_ON(etr_steai(aib, func) != 0);
+ /* Convert port state to logical port state. */
+ if (aib->esw.psc0 == 1)
+ aib->esw.psc0 = 2;
+ else if (aib->esw.psc0 == 0 && aib->esw.p == 0)
+ aib->esw.psc0 = 1;
+ if (aib->esw.psc1 == 1)
+ aib->esw.psc1 = 2;
+ else if (aib->esw.psc1 == 0 && aib->esw.p == 1)
+ aib->esw.psc1 = 1;
+}
+
+/*
+ * Check if the aib a2 is still connected to the same attachment as
+ * aib a1, the etv values differ by one and a2 is valid.
+ */
+static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
+{
+ int state_a1, state_a2;
+
+ /* Paranoia check: e0/e1 should better be the same. */
+ if (a1->esw.eacr.e0 != a2->esw.eacr.e0 ||
+ a1->esw.eacr.e1 != a2->esw.eacr.e1)
+ return 0;
+
+ /* Still connected to the same etr ? */
+ state_a1 = p ? a1->esw.psc1 : a1->esw.psc0;
+ state_a2 = p ? a2->esw.psc1 : a2->esw.psc0;
+ if (state_a1 == etr_lpsc_operational_step) {
+ if (state_a2 != etr_lpsc_operational_step ||
+ a1->edf1.net_id != a2->edf1.net_id ||
+ a1->edf1.etr_id != a2->edf1.etr_id ||
+ a1->edf1.etr_pn != a2->edf1.etr_pn)
+ return 0;
+ } else if (state_a2 != etr_lpsc_pps_mode)
+ return 0;
+
+ /* The ETV value of a2 needs to be ETV of a1 + 1. */
+ if (a1->edf2.etv + 1 != a2->edf2.etv)
+ return 0;
+
+ if (!etr_port_valid(a2, p))
+ return 0;
+
+ return 1;
+}
+
+struct clock_sync_data {
+ atomic_t cpus;
+ int in_sync;
+ unsigned long long fixup_cc;
+ int etr_port;
+ struct etr_aib *etr_aib;
+};
+
+static void clock_sync_cpu(struct clock_sync_data *sync)
+{
+ atomic_dec(&sync->cpus);
+ enable_sync_clock();
+ /*
+ * This looks like a busy wait loop but it isn't. etr_sync_cpus
+ * is called on all other cpus while the TOD clocks is stopped.
+ * __udelay will stop the cpu on an enabled wait psw until the
+ * TOD is running again.
+ */
+ while (sync->in_sync == 0) {
+ __udelay(1);
+ /*
+ * A different cpu changes *in_sync. Therefore use
+ * barrier() to force memory access.
+ */
+ barrier();
+ }
+ if (sync->in_sync != 1)
+ /* Didn't work. Clear per-cpu in sync bit again. */
+ disable_sync_clock(NULL);
+ /*
+ * This round of TOD syncing is done. Set the clock comparator
+ * to the next tick and let the processor continue.
+ */
+ fixup_clock_comparator(sync->fixup_cc);
+}
+
+/*
+ * Sync the TOD clock using the port referred to by aibp. This port
+ * has to be enabled and the other port has to be disabled. The
+ * last eacr update has to be more than 1.6 seconds in the past.
+ */
+static int etr_sync_clock(void *data)
+{
+ static int first;
+ unsigned long long clock, old_clock, delay, delta;
+ struct clock_sync_data *etr_sync;
+ struct etr_aib *sync_port, *aib;
+ int port;
+ int rc;
+
+ etr_sync = data;
+
+ if (xchg(&first, 1) == 1) {
+ /* Slave */
+ clock_sync_cpu(etr_sync);
+ return 0;
+ }
+
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&etr_sync->cpus) != 0)
+ cpu_relax();
+
+ port = etr_sync->etr_port;
+ aib = etr_sync->etr_aib;
+ sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+ enable_sync_clock();
+
+ /* Set clock to next OTE. */
+ __ctl_set_bit(14, 21);
+ __ctl_set_bit(0, 29);
+ clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
+ old_clock = get_tod_clock();
+ if (set_tod_clock(clock) == 0) {
+ __udelay(1); /* Wait for the clock to start. */
+ __ctl_clear_bit(0, 29);
+ __ctl_clear_bit(14, 21);
+ etr_stetr(aib);
+ /* Adjust Linux timing variables. */
+ delay = (unsigned long long)
+ (aib->edf2.etv - sync_port->edf2.etv) << 32;
+ delta = adjust_time(old_clock, clock, delay);
+ etr_sync->fixup_cc = delta;
+ fixup_clock_comparator(delta);
+ /* Verify that the clock is properly set. */
+ if (!etr_aib_follows(sync_port, aib, port)) {
+ /* Didn't work. */
+ disable_sync_clock(NULL);
+ etr_sync->in_sync = -EAGAIN;
+ rc = -EAGAIN;
+ } else {
+ etr_sync->in_sync = 1;
+ rc = 0;
+ }
+ } else {
+ /* Could not set the clock ?!? */
+ __ctl_clear_bit(0, 29);
+ __ctl_clear_bit(14, 21);
+ disable_sync_clock(NULL);
+ etr_sync->in_sync = -EAGAIN;
+ rc = -EAGAIN;
+ }
+ xchg(&first, 0);
+ return rc;
+}
+
+static int etr_sync_clock_stop(struct etr_aib *aib, int port)
+{
+ struct clock_sync_data etr_sync;
+ struct etr_aib *sync_port;
+ int follows;
+ int rc;
+
+ /* Check if the current aib is adjacent to the sync port aib. */
+ sync_port = (port == 0) ? &etr_port0 : &etr_port1;
+ follows = etr_aib_follows(sync_port, aib, port);
+ memcpy(sync_port, aib, sizeof(*aib));
+ if (!follows)
+ return -EAGAIN;
+ memset(&etr_sync, 0, sizeof(etr_sync));
+ etr_sync.etr_aib = aib;
+ etr_sync.etr_port = port;
+ get_online_cpus();
+ atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
+ rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask);
+ put_online_cpus();
+ return rc;
+}
+
+/*
+ * Handle the immediate effects of the different events.
+ * The port change event is used for online/offline changes.
+ */
+static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
+{
+ if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events))
+ eacr.es = 0;
+ if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events))
+ eacr.es = eacr.sl = 0;
+ if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events))
+ etr_port0_uptodate = etr_port1_uptodate = 0;
+
+ if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) {
+ if (eacr.e0)
+ /*
+ * Port change of an enabled port. We have to
+ * assume that this can have caused an stepping
+ * port switch.
+ */
+ etr_tolec = get_tod_clock();
+ eacr.p0 = etr_port0_online;
+ if (!eacr.p0)
+ eacr.e0 = 0;
+ etr_port0_uptodate = 0;
+ }
+ if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) {
+ if (eacr.e1)
+ /*
+ * Port change of an enabled port. We have to
+ * assume that this can have caused an stepping
+ * port switch.
+ */
+ etr_tolec = get_tod_clock();
+ eacr.p1 = etr_port1_online;
+ if (!eacr.p1)
+ eacr.e1 = 0;
+ etr_port1_uptodate = 0;
+ }
+ clear_bit(ETR_EVENT_UPDATE, &etr_events);
+ return eacr;
+}
+
+/*
+ * Set up a timer that expires after the etr_tolec + 1.6 seconds if
+ * one of the ports needs an update.
+ */
+static void etr_set_tolec_timeout(unsigned long long now)
+{
+ unsigned long micros;
+
+ if ((!etr_eacr.p0 || etr_port0_uptodate) &&
+ (!etr_eacr.p1 || etr_port1_uptodate))
+ return;
+ micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0;
+ micros = (micros > 1600000) ? 0 : 1600000 - micros;
+ mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1);
+}
+
+/*
+ * Set up a time that expires after 1/2 second.
+ */
+static void etr_set_sync_timeout(void)
+{
+ mod_timer(&etr_timer, jiffies + HZ/2);
+}
+
+/*
+ * Update the aib information for one or both ports.
+ */
+static struct etr_eacr etr_handle_update(struct etr_aib *aib,
+ struct etr_eacr eacr)
+{
+ /* With both ports disabled the aib information is useless. */
+ if (!eacr.e0 && !eacr.e1)
+ return eacr;
+
+ /* Update port0 or port1 with aib stored in etr_work_fn. */
+ if (aib->esw.q == 0) {
+ /* Information for port 0 stored. */
+ if (eacr.p0 && !etr_port0_uptodate) {
+ etr_port0 = *aib;
+ if (etr_port0_online)
+ etr_port0_uptodate = 1;
+ }
+ } else {
+ /* Information for port 1 stored. */
+ if (eacr.p1 && !etr_port1_uptodate) {
+ etr_port1 = *aib;
+ if (etr_port0_online)
+ etr_port1_uptodate = 1;
+ }
+ }
+
+ /*
+ * Do not try to get the alternate port aib if the clock
+ * is not in sync yet.
+ */
+ if (!eacr.es || !check_sync_clock())
+ return eacr;
+
+ /*
+ * If steai is available we can get the information about
+ * the other port immediately. If only stetr is available the
+ * data-port bit toggle has to be used.
+ */
+ if (etr_steai_available) {
+ if (eacr.p0 && !etr_port0_uptodate) {
+ etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
+ etr_port0_uptodate = 1;
+ }
+ if (eacr.p1 && !etr_port1_uptodate) {
+ etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1);
+ etr_port1_uptodate = 1;
+ }
+ } else {
+ /*
+ * One port was updated above, if the other
+ * port is not uptodate toggle dp bit.
+ */
+ if ((eacr.p0 && !etr_port0_uptodate) ||
+ (eacr.p1 && !etr_port1_uptodate))
+ eacr.dp ^= 1;
+ else
+ eacr.dp = 0;
+ }
+ return eacr;
+}
+
+/*
+ * Write new etr control register if it differs from the current one.
+ * Return 1 if etr_tolec has been updated as well.
+ */
+static void etr_update_eacr(struct etr_eacr eacr)
+{
+ int dp_changed;
+
+ if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0)
+ /* No change, return. */
+ return;
+ /*
+ * The disable of an active port of the change of the data port
+ * bit can/will cause a change in the data port.
+ */
+ dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 ||
+ (etr_eacr.dp ^ eacr.dp) != 0;
+ etr_eacr = eacr;
+ etr_setr(&etr_eacr);
+ if (dp_changed)
+ etr_tolec = get_tod_clock();
+}
+
+/*
+ * ETR work. In this function you'll find the main logic. In
+ * particular this is the only function that calls etr_update_eacr(),
+ * it "controls" the etr control register.
+ */
+static void etr_work_fn(struct work_struct *work)
+{
+ unsigned long long now;
+ struct etr_eacr eacr;
+ struct etr_aib aib;
+ int sync_port;
+
+ /* prevent multiple execution. */
+ mutex_lock(&etr_work_mutex);
+
+ /* Create working copy of etr_eacr. */
+ eacr = etr_eacr;
+
+ /* Check for the different events and their immediate effects. */
+ eacr = etr_handle_events(eacr);
+
+ /* Check if ETR is supposed to be active. */
+ eacr.ea = eacr.p0 || eacr.p1;
+ if (!eacr.ea) {
+ /* Both ports offline. Reset everything. */
+ eacr.dp = eacr.es = eacr.sl = 0;
+ on_each_cpu(disable_sync_clock, NULL, 1);
+ del_timer_sync(&etr_timer);
+ etr_update_eacr(eacr);
+ goto out_unlock;
+ }
+
+ /* Store aib to get the current ETR status word. */
+ BUG_ON(etr_stetr(&aib) != 0);
+ etr_port0.esw = etr_port1.esw = aib.esw; /* Copy status word. */
+ now = get_tod_clock();
+
+ /*
+ * Update the port information if the last stepping port change
+ * or data port change is older than 1.6 seconds.
+ */
+ if (now >= etr_tolec + (1600000 << 12))
+ eacr = etr_handle_update(&aib, eacr);
+
+ /*
+ * Select ports to enable. The preferred synchronization mode is PPS.
+ * If a port can be enabled depends on a number of things:
+ * 1) The port needs to be online and uptodate. A port is not
+ * disabled just because it is not uptodate, but it is only
+ * enabled if it is uptodate.
+ * 2) The port needs to have the same mode (pps / etr).
+ * 3) The port needs to be usable -> etr_port_valid() == 1
+ * 4) To enable the second port the clock needs to be in sync.
+ * 5) If both ports are useable and are ETR ports, the network id
+ * has to be the same.
+ * The eacr.sl bit is used to indicate etr mode vs. pps mode.
+ */
+ if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) {
+ eacr.sl = 0;
+ eacr.e0 = 1;
+ if (!etr_mode_is_pps(etr_eacr))
+ eacr.es = 0;
+ if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode)
+ eacr.e1 = 0;
+ // FIXME: uptodate checks ?
+ else if (etr_port0_uptodate && etr_port1_uptodate)
+ eacr.e1 = 1;
+ sync_port = (etr_port0_uptodate &&
+ etr_port_valid(&etr_port0, 0)) ? 0 : -1;
+ } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
+ eacr.sl = 0;
+ eacr.e0 = 0;
+ eacr.e1 = 1;
+ if (!etr_mode_is_pps(etr_eacr))
+ eacr.es = 0;
+ sync_port = (etr_port1_uptodate &&
+ etr_port_valid(&etr_port1, 1)) ? 1 : -1;
+ } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
+ eacr.sl = 1;
+ eacr.e0 = 1;
+ if (!etr_mode_is_etr(etr_eacr))
+ eacr.es = 0;
+ if (!eacr.es || !eacr.p1 ||
+ aib.esw.psc1 != etr_lpsc_operational_alt)
+ eacr.e1 = 0;
+ else if (etr_port0_uptodate && etr_port1_uptodate &&
+ etr_compare_network(&etr_port0, &etr_port1))
+ eacr.e1 = 1;
+ sync_port = (etr_port0_uptodate &&
+ etr_port_valid(&etr_port0, 0)) ? 0 : -1;
+ } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
+ eacr.sl = 1;
+ eacr.e0 = 0;
+ eacr.e1 = 1;
+ if (!etr_mode_is_etr(etr_eacr))
+ eacr.es = 0;
+ sync_port = (etr_port1_uptodate &&
+ etr_port_valid(&etr_port1, 1)) ? 1 : -1;
+ } else {
+ /* Both ports not usable. */
+ eacr.es = eacr.sl = 0;
+ sync_port = -1;
+ }
+
+ /*
+ * If the clock is in sync just update the eacr and return.
+ * If there is no valid sync port wait for a port update.
+ */
+ if ((eacr.es && check_sync_clock()) || sync_port < 0) {
+ etr_update_eacr(eacr);
+ etr_set_tolec_timeout(now);
+ goto out_unlock;
+ }
+
+ /*
+ * Prepare control register for clock syncing
+ * (reset data port bit, set sync check control.
+ */
+ eacr.dp = 0;
+ eacr.es = 1;
+
+ /*
+ * Update eacr and try to synchronize the clock. If the update
+ * of eacr caused a stepping port switch (or if we have to
+ * assume that a stepping port switch has occurred) or the
+ * clock syncing failed, reset the sync check control bit
+ * and set up a timer to try again after 0.5 seconds
+ */
+ etr_update_eacr(eacr);
+ if (now < etr_tolec + (1600000 << 12) ||
+ etr_sync_clock_stop(&aib, sync_port) != 0) {
+ /* Sync failed. Try again in 1/2 second. */
+ eacr.es = 0;
+ etr_update_eacr(eacr);
+ etr_set_sync_timeout();
+ } else
+ etr_set_tolec_timeout(now);
+out_unlock:
+ mutex_unlock(&etr_work_mutex);
+}
+
+/*
+ * Sysfs interface functions
+ */
+static struct bus_type etr_subsys = {
+ .name = "etr",
+ .dev_name = "etr",
+};
+
+static struct device etr_port0_dev = {
+ .id = 0,
+ .bus = &etr_subsys,
+};
+
+static struct device etr_port1_dev = {
+ .id = 1,
+ .bus = &etr_subsys,
+};
+
+/*
+ * ETR subsys attributes
+ */
+static ssize_t etr_stepping_port_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%i\n", etr_port0.esw.p);
+}
+
+static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
+
+static ssize_t etr_stepping_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ char *mode_str;
+
+ if (etr_mode_is_pps(etr_eacr))
+ mode_str = "pps";
+ else if (etr_mode_is_etr(etr_eacr))
+ mode_str = "etr";
+ else
+ mode_str = "local";
+ return sprintf(buf, "%s\n", mode_str);
+}
+
+static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
+
+/*
+ * ETR port attributes
+ */
+static inline struct etr_aib *etr_aib_from_dev(struct device *dev)
+{
+ if (dev == &etr_port0_dev)
+ return etr_port0_online ? &etr_port0 : NULL;
+ else
+ return etr_port1_online ? &etr_port1 : NULL;
+}
+
+static ssize_t etr_online_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ unsigned int online;
+
+ online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online;
+ return sprintf(buf, "%i\n", online);
+}
+
+static ssize_t etr_online_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int value;
+
+ value = simple_strtoul(buf, NULL, 0);
+ if (value != 0 && value != 1)
+ return -EINVAL;
+ if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
+ return -EOPNOTSUPP;
+ mutex_lock(&clock_sync_mutex);
+ if (dev == &etr_port0_dev) {
+ if (etr_port0_online == value)
+ goto out; /* Nothing to do. */
+ etr_port0_online = value;
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
+ queue_work(time_sync_wq, &etr_work);
+ } else {
+ if (etr_port1_online == value)
+ goto out; /* Nothing to do. */
+ etr_port1_online = value;
+ if (etr_port0_online && etr_port1_online)
+ set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
+ set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
+ queue_work(time_sync_wq, &etr_work);
+ }
+out:
+ mutex_unlock(&clock_sync_mutex);
+ return count;
+}
+
+static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store);
+
+static ssize_t etr_stepping_control_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
+ etr_eacr.e0 : etr_eacr.e1);
+}
+
+static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
+
+static ssize_t etr_mode_code_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (!etr_port0_online && !etr_port1_online)
+ /* Status word is not uptodate if both ports are offline. */
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
+ etr_port0.esw.psc0 : etr_port0.esw.psc1);
+}
+
+static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL);
+
+static ssize_t etr_untuned_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct etr_aib *aib = etr_aib_from_dev(dev);
+
+ if (!aib || !aib->slsw.v1)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", aib->edf1.u);
+}
+
+static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL);
+
+static ssize_t etr_network_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct etr_aib *aib = etr_aib_from_dev(dev);
+
+ if (!aib || !aib->slsw.v1)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", aib->edf1.net_id);
+}
+
+static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL);
+
+static ssize_t etr_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct etr_aib *aib = etr_aib_from_dev(dev);
+
+ if (!aib || !aib->slsw.v1)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", aib->edf1.etr_id);
+}
+
+static DEVICE_ATTR(id, 0400, etr_id_show, NULL);
+
+static ssize_t etr_port_number_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct etr_aib *aib = etr_aib_from_dev(dev);
+
+ if (!aib || !aib->slsw.v1)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", aib->edf1.etr_pn);
+}
+
+static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL);
+
+static ssize_t etr_coupled_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct etr_aib *aib = etr_aib_from_dev(dev);
+
+ if (!aib || !aib->slsw.v3)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", aib->edf3.c);
+}
+
+static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL);
+
+static ssize_t etr_local_time_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct etr_aib *aib = etr_aib_from_dev(dev);
+
+ if (!aib || !aib->slsw.v3)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", aib->edf3.blto);
+}
+
+static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL);
+
+static ssize_t etr_utc_offset_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct etr_aib *aib = etr_aib_from_dev(dev);
+
+ if (!aib || !aib->slsw.v3)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", aib->edf3.buo);
+}
+
+static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
+
+static struct device_attribute *etr_port_attributes[] = {
+ &dev_attr_online,
+ &dev_attr_stepping_control,
+ &dev_attr_state_code,
+ &dev_attr_untuned,
+ &dev_attr_network,
+ &dev_attr_id,
+ &dev_attr_port,
+ &dev_attr_coupled,
+ &dev_attr_local_time,
+ &dev_attr_utc_offset,
+ NULL
+};
+
+static int __init etr_register_port(struct device *dev)
+{
+ struct device_attribute **attr;
+ int rc;
+
+ rc = device_register(dev);
+ if (rc)
+ goto out;
+ for (attr = etr_port_attributes; *attr; attr++) {
+ rc = device_create_file(dev, *attr);
+ if (rc)
+ goto out_unreg;
+ }
+ return 0;
+out_unreg:
+ for (; attr >= etr_port_attributes; attr--)
+ device_remove_file(dev, *attr);
+ device_unregister(dev);
+out:
+ return rc;
+}
+
+static void __init etr_unregister_port(struct device *dev)
+{
+ struct device_attribute **attr;
+
+ for (attr = etr_port_attributes; *attr; attr++)
+ device_remove_file(dev, *attr);
+ device_unregister(dev);
+}
+
+static int __init etr_init_sysfs(void)
+{
+ int rc;
+
+ rc = subsys_system_register(&etr_subsys, NULL);
+ if (rc)
+ goto out;
+ rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port);
+ if (rc)
+ goto out_unreg_subsys;
+ rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
+ if (rc)
+ goto out_remove_stepping_port;
+ rc = etr_register_port(&etr_port0_dev);
+ if (rc)
+ goto out_remove_stepping_mode;
+ rc = etr_register_port(&etr_port1_dev);
+ if (rc)
+ goto out_remove_port0;
+ return 0;
+
+out_remove_port0:
+ etr_unregister_port(&etr_port0_dev);
+out_remove_stepping_mode:
+ device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
+out_remove_stepping_port:
+ device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port);
+out_unreg_subsys:
+ bus_unregister(&etr_subsys);
+out:
+ return rc;
+}
+
+device_initcall(etr_init_sysfs);
+
+/*
+ * Server Time Protocol (STP) code.
+ */
+static int stp_online;
+static struct stp_sstpi stp_info;
+static void *stp_page;
+
+static void stp_work_fn(struct work_struct *work);
+static DEFINE_MUTEX(stp_work_mutex);
+static DECLARE_WORK(stp_work, stp_work_fn);
+static struct timer_list stp_timer;
+
+static int __init early_parse_stp(char *p)
+{
+ if (strncmp(p, "off", 3) == 0)
+ stp_online = 0;
+ else if (strncmp(p, "on", 2) == 0)
+ stp_online = 1;
+ return 0;
+}
+early_param("stp", early_parse_stp);
+
+/*
+ * Reset STP attachment.
+ */
+static void __init stp_reset(void)
+{
+ int rc;
+
+ stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ if (rc == 0)
+ set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+ else if (stp_online) {
+ pr_warning("The real or virtual hardware system does "
+ "not provide an STP interface\n");
+ free_page((unsigned long) stp_page);
+ stp_page = NULL;
+ stp_online = 0;
+ }
+}
+
+static void stp_timeout(unsigned long dummy)
+{
+ queue_work(time_sync_wq, &stp_work);
+}
+
+static int __init stp_init(void)
+{
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return 0;
+ setup_timer(&stp_timer, stp_timeout, 0UL);
+ time_init_wq();
+ if (!stp_online)
+ return 0;
+ queue_work(time_sync_wq, &stp_work);
+ return 0;
+}
+
+arch_initcall(stp_init);
+
+/*
+ * STP timing alert. There are three causes:
+ * 1) timing status change
+ * 2) link availability change
+ * 3) time control parameter change
+ * In all three cases we are only interested in the clock source state.
+ * If a STP clock source is now available use it.
+ */
+static void stp_timing_alert(struct stp_irq_parm *intparm)
+{
+ if (intparm->tsc || intparm->lac || intparm->tcpc)
+ queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP sync check machine check. This is called when the timing state
+ * changes from the synchronized state to the unsynchronized state.
+ * After a STP sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+void stp_sync_check(void)
+{
+ disable_sync_clock(NULL);
+ queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP island condition machine check. This is called when an attached
+ * server attempts to communicate over an STP link and the servers
+ * have matching CTN ids and have a valid stratum-1 configuration
+ * but the configurations do not match.
+ */
+void stp_island_check(void)
+{
+ disable_sync_clock(NULL);
+ queue_work(time_sync_wq, &stp_work);
+}
+
+
+static int stp_sync_clock(void *data)
+{
+ static int first;
+ unsigned long long old_clock, delta;
+ struct clock_sync_data *stp_sync;
+ int rc;
+
+ stp_sync = data;
+
+ if (xchg(&first, 1) == 1) {
+ /* Slave */
+ clock_sync_cpu(stp_sync);
+ return 0;
+ }
+
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&stp_sync->cpus) != 0)
+ cpu_relax();
+
+ enable_sync_clock();
+
+ rc = 0;
+ if (stp_info.todoff[0] || stp_info.todoff[1] ||
+ stp_info.todoff[2] || stp_info.todoff[3] ||
+ stp_info.tmd != 2) {
+ old_clock = get_tod_clock();
+ rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+ if (rc == 0) {
+ delta = adjust_time(old_clock, get_tod_clock(), 0);
+ fixup_clock_comparator(delta);
+ rc = chsc_sstpi(stp_page, &stp_info,
+ sizeof(struct stp_sstpi));
+ if (rc == 0 && stp_info.tmd != 2)
+ rc = -EAGAIN;
+ }
+ }
+ if (rc) {
+ disable_sync_clock(NULL);
+ stp_sync->in_sync = -EAGAIN;
+ } else
+ stp_sync->in_sync = 1;
+ xchg(&first, 0);
+ return 0;
+}
+
+/*
+ * STP work. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+static void stp_work_fn(struct work_struct *work)
+{
+ struct clock_sync_data stp_sync;
+ int rc;
+
+ /* prevent multiple execution. */
+ mutex_lock(&stp_work_mutex);
+
+ if (!stp_online) {
+ chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+ del_timer_sync(&stp_timer);
+ goto out_unlock;
+ }
+
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+ if (rc)
+ goto out_unlock;
+
+ rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+ if (rc || stp_info.c == 0)
+ goto out_unlock;
+
+ /* Skip synchronization if the clock is already in sync. */
+ if (check_sync_clock())
+ goto out_unlock;
+
+ memset(&stp_sync, 0, sizeof(stp_sync));
+ get_online_cpus();
+ atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+ stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
+ put_online_cpus();
+
+ if (!check_sync_clock())
+ /*
+ * There is a usable clock but the synchonization failed.
+ * Retry after a second.
+ */
+ mod_timer(&stp_timer, jiffies + HZ);
+
+out_unlock:
+ mutex_unlock(&stp_work_mutex);
+}
+
+/*
+ * STP subsys sysfs interface functions
+ */
+static struct bus_type stp_subsys = {
+ .name = "stp",
+ .dev_name = "stp",
+};
+
+static ssize_t stp_ctn_id_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%016llx\n",
+ *(unsigned long long *) stp_info.ctnid);
+}
+
+static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+
+static ssize_t stp_ctn_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", stp_info.ctn);
+}
+
+static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+
+static ssize_t stp_dst_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online || !(stp_info.vbits & 0x2000))
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+}
+
+static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+
+static ssize_t stp_leap_seconds_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online || !(stp_info.vbits & 0x8000))
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+}
+
+static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+
+static ssize_t stp_stratum_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+}
+
+static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+
+static ssize_t stp_time_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online || !(stp_info.vbits & 0x0800))
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int) stp_info.tto);
+}
+
+static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+
+static ssize_t stp_time_zone_offset_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online || !(stp_info.vbits & 0x4000))
+ return -ENODATA;
+ return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+}
+
+static DEVICE_ATTR(time_zone_offset, 0400,
+ stp_time_zone_offset_show, NULL);
+
+static ssize_t stp_timing_mode_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", stp_info.tmd);
+}
+
+static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+
+static ssize_t stp_timing_state_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (!stp_online)
+ return -ENODATA;
+ return sprintf(buf, "%i\n", stp_info.tst);
+}
+
+static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+
+static ssize_t stp_online_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%i\n", stp_online);
+}
+
+static ssize_t stp_online_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int value;
+
+ value = simple_strtoul(buf, NULL, 0);
+ if (value != 0 && value != 1)
+ return -EINVAL;
+ if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+ return -EOPNOTSUPP;
+ mutex_lock(&clock_sync_mutex);
+ stp_online = value;
+ if (stp_online)
+ set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ else
+ clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+ queue_work(time_sync_wq, &stp_work);
+ mutex_unlock(&clock_sync_mutex);
+ return count;
+}
+
+/*
+ * Can't use DEVICE_ATTR because the attribute should be named
+ * stp/online but dev_attr_online already exists in this file ..
+ */
+static struct device_attribute dev_attr_stp_online = {
+ .attr = { .name = "online", .mode = 0600 },
+ .show = stp_online_show,
+ .store = stp_online_store,
+};
+
+static struct device_attribute *stp_attributes[] = {
+ &dev_attr_ctn_id,
+ &dev_attr_ctn_type,
+ &dev_attr_dst_offset,
+ &dev_attr_leap_seconds,
+ &dev_attr_stp_online,
+ &dev_attr_stratum,
+ &dev_attr_time_offset,
+ &dev_attr_time_zone_offset,
+ &dev_attr_timing_mode,
+ &dev_attr_timing_state,
+ NULL
+};
+
+static int __init stp_init_sysfs(void)
+{
+ struct device_attribute **attr;
+ int rc;
+
+ rc = subsys_system_register(&stp_subsys, NULL);
+ if (rc)
+ goto out;
+ for (attr = stp_attributes; *attr; attr++) {
+ rc = device_create_file(stp_subsys.dev_root, *attr);
+ if (rc)
+ goto out_unreg;
+ }
+ return 0;
+out_unreg:
+ for (; attr >= stp_attributes; attr--)
+ device_remove_file(stp_subsys.dev_root, *attr);
+ bus_unregister(&stp_subsys);
+out:
+ return rc;
+}
+
+device_initcall(stp_init_sysfs);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
new file mode 100644
index 000000000..5728c5bd4
--- /dev/null
+++ b/arch/s390/kernel/topology.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright IBM Corp. 2007, 2011
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/cpuset.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <asm/sysinfo.h>
+
+#define PTF_HORIZONTAL (0UL)
+#define PTF_VERTICAL (1UL)
+#define PTF_CHECK (2UL)
+
+struct mask_info {
+ struct mask_info *next;
+ unsigned char id;
+ cpumask_t mask;
+};
+
+static void set_topology_timer(void);
+static void topology_work_fn(struct work_struct *work);
+static struct sysinfo_15_1_x *tl_info;
+
+static int topology_enabled = 1;
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+/* topology_lock protects the socket and book linked lists */
+static DEFINE_SPINLOCK(topology_lock);
+static struct mask_info socket_info;
+static struct mask_info book_info;
+
+DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
+
+static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
+{
+ cpumask_t mask;
+
+ cpumask_copy(&mask, cpumask_of(cpu));
+ if (!topology_enabled || !MACHINE_HAS_TOPOLOGY)
+ return mask;
+ for (; info; info = info->next) {
+ if (cpumask_test_cpu(cpu, &info->mask))
+ return info->mask;
+ }
+ return mask;
+}
+
+static cpumask_t cpu_thread_map(unsigned int cpu)
+{
+ cpumask_t mask;
+ int i;
+
+ cpumask_copy(&mask, cpumask_of(cpu));
+ if (!topology_enabled || !MACHINE_HAS_TOPOLOGY)
+ return mask;
+ cpu -= cpu % (smp_cpu_mtid + 1);
+ for (i = 0; i <= smp_cpu_mtid; i++)
+ if (cpu_present(cpu + i))
+ cpumask_set_cpu(cpu + i, &mask);
+ return mask;
+}
+
+static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
+ struct mask_info *book,
+ struct mask_info *socket,
+ int one_socket_per_cpu)
+{
+ unsigned int core;
+
+ for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) {
+ unsigned int rcore;
+ int lcpu, i;
+
+ rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
+ lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+ if (lcpu < 0)
+ continue;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ per_cpu(cpu_topology, lcpu + i).book_id = book->id;
+ per_cpu(cpu_topology, lcpu + i).core_id = rcore;
+ per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i;
+ cpumask_set_cpu(lcpu + i, &book->mask);
+ cpumask_set_cpu(lcpu + i, &socket->mask);
+ if (one_socket_per_cpu)
+ per_cpu(cpu_topology, lcpu + i).socket_id = rcore;
+ else
+ per_cpu(cpu_topology, lcpu + i).socket_id = socket->id;
+ smp_cpu_set_polarization(lcpu + i, tl_core->pp);
+ }
+ if (one_socket_per_cpu)
+ socket = socket->next;
+ }
+ return socket;
+}
+
+static void clear_masks(void)
+{
+ struct mask_info *info;
+
+ info = &socket_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
+ }
+ info = &book_info;
+ while (info) {
+ cpumask_clear(&info->mask);
+ info = info->next;
+ }
+}
+
+static union topology_entry *next_tle(union topology_entry *tle)
+{
+ if (!tle->nl)
+ return (union topology_entry *)((struct topology_core *)tle + 1);
+ return (union topology_entry *)((struct topology_container *)tle + 1);
+}
+
+static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
+{
+ struct mask_info *socket = &socket_info;
+ struct mask_info *book = &book_info;
+ union topology_entry *tle, *end;
+
+ tle = info->tle;
+ end = (union topology_entry *)((unsigned long)info + info->length);
+ while (tle < end) {
+ switch (tle->nl) {
+ case 2:
+ book = book->next;
+ book->id = tle->container.id;
+ break;
+ case 1:
+ socket = socket->next;
+ socket->id = tle->container.id;
+ break;
+ case 0:
+ add_cpus_to_mask(&tle->cpu, book, socket, 0);
+ break;
+ default:
+ clear_masks();
+ return;
+ }
+ tle = next_tle(tle);
+ }
+}
+
+static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
+{
+ struct mask_info *socket = &socket_info;
+ struct mask_info *book = &book_info;
+ union topology_entry *tle, *end;
+
+ tle = info->tle;
+ end = (union topology_entry *)((unsigned long)info + info->length);
+ while (tle < end) {
+ switch (tle->nl) {
+ case 1:
+ book = book->next;
+ book->id = tle->container.id;
+ break;
+ case 0:
+ socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+ break;
+ default:
+ clear_masks();
+ return;
+ }
+ tle = next_tle(tle);
+ }
+}
+
+static void tl_to_masks(struct sysinfo_15_1_x *info)
+{
+ struct cpuid cpu_id;
+
+ spin_lock_irq(&topology_lock);
+ get_cpu_id(&cpu_id);
+ clear_masks();
+ switch (cpu_id.machine) {
+ case 0x2097:
+ case 0x2098:
+ __tl_to_masks_z10(info);
+ break;
+ default:
+ __tl_to_masks_generic(info);
+ }
+ spin_unlock_irq(&topology_lock);
+}
+
+static void topology_update_polarization_simple(void)
+{
+ int cpu;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ for_each_possible_cpu(cpu)
+ smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
+ mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static int ptf(unsigned long fc)
+{
+ int rc;
+
+ asm volatile(
+ " .insn rre,0xb9a20000,%1,%1\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (rc)
+ : "d" (fc) : "cc");
+ return rc;
+}
+
+int topology_set_cpu_management(int fc)
+{
+ int cpu, rc;
+
+ if (!MACHINE_HAS_TOPOLOGY)
+ return -EOPNOTSUPP;
+ if (fc)
+ rc = ptf(PTF_VERTICAL);
+ else
+ rc = ptf(PTF_HORIZONTAL);
+ if (rc)
+ return -EBUSY;
+ for_each_possible_cpu(cpu)
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ return rc;
+}
+
+static void update_cpu_masks(void)
+{
+ unsigned long flags;
+ int cpu;
+
+ spin_lock_irqsave(&topology_lock, flags);
+ for_each_possible_cpu(cpu) {
+ per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
+ per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
+ per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu);
+ if (!MACHINE_HAS_TOPOLOGY) {
+ per_cpu(cpu_topology, cpu).thread_id = cpu;
+ per_cpu(cpu_topology, cpu).core_id = cpu;
+ per_cpu(cpu_topology, cpu).socket_id = cpu;
+ per_cpu(cpu_topology, cpu).book_id = cpu;
+ }
+ }
+ spin_unlock_irqrestore(&topology_lock, flags);
+}
+
+void store_topology(struct sysinfo_15_1_x *info)
+{
+ if (topology_max_mnest >= 3)
+ stsi(info, 15, 1, 3);
+ else
+ stsi(info, 15, 1, 2);
+}
+
+int arch_update_cpu_topology(void)
+{
+ struct sysinfo_15_1_x *info = tl_info;
+ struct device *dev;
+ int cpu;
+
+ if (!MACHINE_HAS_TOPOLOGY) {
+ update_cpu_masks();
+ topology_update_polarization_simple();
+ return 0;
+ }
+ store_topology(info);
+ tl_to_masks(info);
+ update_cpu_masks();
+ for_each_online_cpu(cpu) {
+ dev = get_cpu_device(cpu);
+ kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+ }
+ return 1;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+ rebuild_sched_domains();
+}
+
+void topology_schedule_update(void)
+{
+ schedule_work(&topology_work);
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+ if (ptf(PTF_CHECK))
+ topology_schedule_update();
+ set_topology_timer();
+}
+
+static struct timer_list topology_timer =
+ TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0);
+
+static atomic_t topology_poll = ATOMIC_INIT(0);
+
+static void set_topology_timer(void)
+{
+ if (atomic_add_unless(&topology_poll, -1, 0))
+ mod_timer(&topology_timer, jiffies + HZ / 10);
+ else
+ mod_timer(&topology_timer, jiffies + HZ * 60);
+}
+
+void topology_expect_change(void)
+{
+ if (!MACHINE_HAS_TOPOLOGY)
+ return;
+ /* This is racy, but it doesn't matter since it is just a heuristic.
+ * Worst case is that we poll in a higher frequency for a bit longer.
+ */
+ if (atomic_read(&topology_poll) > 60)
+ return;
+ atomic_add(60, &topology_poll);
+ set_topology_timer();
+}
+
+static int cpu_management;
+
+static ssize_t dispatching_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ count = sprintf(buf, "%d\n", cpu_management);
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+
+static ssize_t dispatching_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int val, rc;
+ char delim;
+
+ if (sscanf(buf, "%d %c", &val, &delim) != 1)
+ return -EINVAL;
+ if (val != 0 && val != 1)
+ return -EINVAL;
+ rc = 0;
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
+ if (cpu_management == val)
+ goto out;
+ rc = topology_set_cpu_management(val);
+ if (rc)
+ goto out;
+ cpu_management = val;
+ topology_expect_change();
+out:
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
+ return rc ? rc : count;
+}
+static DEVICE_ATTR(dispatching, 0644, dispatching_show,
+ dispatching_store);
+
+static ssize_t cpu_polarization_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int cpu = dev->id;
+ ssize_t count;
+
+ mutex_lock(&smp_cpu_state_mutex);
+ switch (smp_cpu_get_polarization(cpu)) {
+ case POLARIZATION_HRZ:
+ count = sprintf(buf, "horizontal\n");
+ break;
+ case POLARIZATION_VL:
+ count = sprintf(buf, "vertical:low\n");
+ break;
+ case POLARIZATION_VM:
+ count = sprintf(buf, "vertical:medium\n");
+ break;
+ case POLARIZATION_VH:
+ count = sprintf(buf, "vertical:high\n");
+ break;
+ default:
+ count = sprintf(buf, "unknown\n");
+ break;
+ }
+ mutex_unlock(&smp_cpu_state_mutex);
+ return count;
+}
+static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
+static struct attribute *topology_cpu_attrs[] = {
+ &dev_attr_polarization.attr,
+ NULL,
+};
+
+static struct attribute_group topology_cpu_attr_group = {
+ .attrs = topology_cpu_attrs,
+};
+
+int topology_cpu_init(struct cpu *cpu)
+{
+ return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+}
+
+static const struct cpumask *cpu_thread_mask(int cpu)
+{
+ return &per_cpu(cpu_topology, cpu).thread_mask;
+}
+
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &per_cpu(cpu_topology, cpu).core_mask;
+}
+
+static const struct cpumask *cpu_book_mask(int cpu)
+{
+ return &per_cpu(cpu_topology, cpu).book_mask;
+}
+
+static int __init early_parse_topology(char *p)
+{
+ if (strncmp(p, "off", 3))
+ return 0;
+ topology_enabled = 0;
+ return 0;
+}
+early_param("topology", early_parse_topology);
+
+static struct sched_domain_topology_level s390_topology[] = {
+ { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_book_mask, SD_INIT_NAME(BOOK) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+ struct mask_info *mask, int offset)
+{
+ int i, nr_masks;
+
+ nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+ for (i = 0; i < info->mnest - offset; i++)
+ nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+ nr_masks = max(nr_masks, 1);
+ for (i = 0; i < nr_masks; i++) {
+ mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL);
+ mask = mask->next;
+ }
+}
+
+static int __init s390_topology_init(void)
+{
+ struct sysinfo_15_1_x *info;
+ int i;
+
+ if (!MACHINE_HAS_TOPOLOGY)
+ return 0;
+ tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL);
+ info = tl_info;
+ store_topology(info);
+ pr_info("The CPU configuration topology of the machine is:");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ printk(KERN_CONT " %d", info->mag[i]);
+ printk(KERN_CONT " / %d\n", info->mnest);
+ alloc_masks(info, &socket_info, 1);
+ alloc_masks(info, &book_info, 2);
+ set_sched_topology(s390_topology);
+ return 0;
+}
+early_initcall(s390_topology_init);
+
+static int __init topology_init(void)
+{
+ if (MACHINE_HAS_TOPOLOGY)
+ set_topology_timer();
+ else
+ topology_update_polarization_simple();
+ return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
+}
+device_initcall(topology_init);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
new file mode 100644
index 000000000..4d96c9f53
--- /dev/null
+++ b/arch/s390/kernel/traps.c
@@ -0,0 +1,342 @@
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *
+ * Derived from "arch/i386/kernel/traps.c"
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+int show_unhandled_signals = 1;
+
+static inline void __user *get_trap_ip(struct pt_regs *regs)
+{
+ unsigned long address;
+
+ if (regs->int_code & 0x200)
+ address = *(unsigned long *)(current->thread.trap_tdb + 24);
+ else
+ address = regs->psw.addr;
+ return (void __user *)
+ ((address - (regs->int_code >> 16)) & PSW_ADDR_INSN);
+}
+
+static inline void report_user_fault(struct pt_regs *regs, int signr)
+{
+ if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
+ return;
+ if (!unhandled_signal(current, signr))
+ return;
+ if (!printk_ratelimit())
+ return;
+ printk("User process fault: interruption code %04x ilc:%d ",
+ regs->int_code & 0xffff, regs->int_code >> 17);
+ print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+ printk("\n");
+ show_regs(regs);
+}
+
+int is_valid_bugaddr(unsigned long addr)
+{
+ return 1;
+}
+
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+ siginfo_t info;
+
+ if (user_mode(regs)) {
+ info.si_signo = si_signo;
+ info.si_errno = 0;
+ info.si_code = si_code;
+ info.si_addr = get_trap_ip(regs);
+ force_sig_info(si_signo, &info, current);
+ report_user_fault(regs, si_signo);
+ } else {
+ const struct exception_table_entry *fixup;
+ fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
+ if (fixup)
+ regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE;
+ else {
+ enum bug_trap_type btt;
+
+ btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
+ if (btt == BUG_TRAP_TYPE_WARN)
+ return;
+ die(regs, str);
+ }
+ }
+}
+
+static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+ if (notify_die(DIE_TRAP, str, regs, 0,
+ regs->int_code, si_signo) == NOTIFY_STOP)
+ return;
+ do_report_trap(regs, si_signo, si_code, str);
+}
+NOKPROBE_SYMBOL(do_trap);
+
+void do_per_trap(struct pt_regs *regs)
+{
+ siginfo_t info;
+
+ if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+ return;
+ if (!current->ptrace)
+ return;
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_HWBKPT;
+ info.si_addr =
+ (void __force __user *) current->thread.per_event.address;
+ force_sig_info(SIGTRAP, &info, current);
+}
+NOKPROBE_SYMBOL(do_per_trap);
+
+void default_trap_handler(struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ report_user_fault(regs, SIGSEGV);
+ do_exit(SIGSEGV);
+ } else
+ die(regs, "Unknown program exception");
+}
+
+#define DO_ERROR_INFO(name, signr, sicode, str) \
+void name(struct pt_regs *regs) \
+{ \
+ do_trap(regs, signr, sicode, str); \
+}
+
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
+ "addressing exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
+ "execute exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
+ "fixpoint divide exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
+ "fixpoint overflow exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
+ "HFP overflow exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
+ "HFP underflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
+ "HFP significance exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
+ "HFP divide exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
+ "HFP square root exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
+ "operand exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
+ "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
+ "special operation exception")
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
+ "transaction constraint exception")
+
+static inline void do_fp_trap(struct pt_regs *regs, int fpc)
+{
+ int si_code = 0;
+ /* FPC[2] is Data Exception Code */
+ if ((fpc & 0x00000300) == 0) {
+ /* bits 6 and 7 of DXC are 0 iff IEEE exception */
+ if (fpc & 0x8000) /* invalid fp operation */
+ si_code = FPE_FLTINV;
+ else if (fpc & 0x4000) /* div by 0 */
+ si_code = FPE_FLTDIV;
+ else if (fpc & 0x2000) /* overflow */
+ si_code = FPE_FLTOVF;
+ else if (fpc & 0x1000) /* underflow */
+ si_code = FPE_FLTUND;
+ else if (fpc & 0x0800) /* inexact */
+ si_code = FPE_FLTRES;
+ }
+ do_trap(regs, SIGFPE, si_code, "floating point exception");
+}
+
+void translation_exception(struct pt_regs *regs)
+{
+ /* May never happen. */
+ panic("Translation exception");
+}
+
+void illegal_op(struct pt_regs *regs)
+{
+ siginfo_t info;
+ __u8 opcode[6];
+ __u16 __user *location;
+ int is_uprobe_insn = 0;
+ int signal = 0;
+
+ location = get_trap_ip(regs);
+
+ if (user_mode(regs)) {
+ if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+ return;
+ if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+ if (current->ptrace) {
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_BRKPT;
+ info.si_addr = location;
+ force_sig_info(SIGTRAP, &info, current);
+ } else
+ signal = SIGILL;
+#ifdef CONFIG_UPROBES
+ } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+ is_uprobe_insn = 1;
+#endif
+ } else
+ signal = SIGILL;
+ }
+ /*
+ * We got either an illegal op in kernel mode, or user space trapped
+ * on a uprobes illegal instruction. See if kprobes or uprobes picks
+ * it up. If not, SIGILL.
+ */
+ if (is_uprobe_insn || !user_mode(regs)) {
+ if (notify_die(DIE_BPT, "bpt", regs, 0,
+ 3, SIGTRAP) != NOTIFY_STOP)
+ signal = SIGILL;
+ }
+ if (signal)
+ do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
+}
+NOKPROBE_SYMBOL(illegal_op);
+
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
+ "specification exception");
+
+int alloc_vector_registers(struct task_struct *tsk)
+{
+ __vector128 *vxrs;
+ int i;
+
+ /* Allocate vector register save area. */
+ vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
+ GFP_KERNEL|__GFP_REPEAT);
+ if (!vxrs)
+ return -ENOMEM;
+ preempt_disable();
+ if (tsk == current)
+ save_fp_regs(tsk->thread.fp_regs.fprs);
+ /* Copy the 16 floating point registers */
+ for (i = 0; i < 16; i++)
+ *(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
+ tsk->thread.vxrs = vxrs;
+ if (tsk == current) {
+ __ctl_set_bit(0, 17);
+ restore_vx_regs(vxrs);
+ }
+ preempt_enable();
+ return 0;
+}
+
+void vector_exception(struct pt_regs *regs)
+{
+ int si_code, vic;
+
+ if (!MACHINE_HAS_VX) {
+ do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
+ return;
+ }
+
+ /* get vector interrupt code from fpc */
+ asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+ vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+ switch (vic) {
+ case 1: /* invalid vector operation */
+ si_code = FPE_FLTINV;
+ break;
+ case 2: /* division by zero */
+ si_code = FPE_FLTDIV;
+ break;
+ case 3: /* overflow */
+ si_code = FPE_FLTOVF;
+ break;
+ case 4: /* underflow */
+ si_code = FPE_FLTUND;
+ break;
+ case 5: /* inexact */
+ si_code = FPE_FLTRES;
+ break;
+ default: /* unknown cause */
+ si_code = 0;
+ }
+ do_trap(regs, SIGFPE, si_code, "vector exception");
+}
+
+static int __init disable_vector_extension(char *str)
+{
+ S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+ return 1;
+}
+__setup("novx", disable_vector_extension);
+
+void data_exception(struct pt_regs *regs)
+{
+ __u16 __user *location;
+ int signal = 0;
+
+ location = get_trap_ip(regs);
+
+ asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+ /* Check for vector register enablement */
+ if (MACHINE_HAS_VX && !current->thread.vxrs &&
+ (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+ alloc_vector_registers(current);
+ /* Vector data exception is suppressing, rewind psw. */
+ regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+ return;
+ }
+ if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
+ signal = SIGFPE;
+ else
+ signal = SIGILL;
+ if (signal == SIGFPE)
+ do_fp_trap(regs, current->thread.fp_regs.fpc);
+ else if (signal)
+ do_trap(regs, signal, ILL_ILLOPN, "data exception");
+}
+
+void space_switch_exception(struct pt_regs *regs)
+{
+ /* Set user psw back to home space mode. */
+ if (user_mode(regs))
+ regs->psw.mask |= PSW_ASC_HOME;
+ /* Send SIGILL. */
+ do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
+}
+
+void kernel_stack_overflow(struct pt_regs *regs)
+{
+ bust_spinlocks(1);
+ printk("Kernel stack overflow.\n");
+ show_regs(regs);
+ bust_spinlocks(0);
+ panic("Corrupt kernel stack, can't continue.");
+}
+NOKPROBE_SYMBOL(kernel_stack_overflow);
+
+void __init trap_init(void)
+{
+ local_mcck_enable();
+}
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
new file mode 100644
index 000000000..66956c09d
--- /dev/null
+++ b/arch/s390/kernel/uprobes.c
@@ -0,0 +1,385 @@
+/*
+ * User-space Probes (UProbes) for s390
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Jan Willeke,
+ */
+
+#include <linux/uaccess.h>
+#include <linux/uprobes.h>
+#include <linux/compat.h>
+#include <linux/kdebug.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+#include "entry.h"
+
+#define UPROBE_TRAP_NR UINT_MAX
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long addr)
+{
+ return probe_is_prohibited_opcode(auprobe->insn);
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT)
+ return -EINVAL;
+ if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT)
+ return -EINVAL;
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+ auprobe->saved_per = psw_bits(regs->psw).r;
+ auprobe->saved_int_code = regs->int_code;
+ regs->int_code = UPROBE_TRAP_NR;
+ regs->psw.addr = current->utask->xol_vaddr;
+ set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+ update_cr_regs(current);
+ return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+ struct pt_regs *regs = task_pt_regs(tsk);
+
+ if (regs->int_code != UPROBE_TRAP_NR)
+ return true;
+ return false;
+}
+
+static int check_per_event(unsigned short cause, unsigned long control,
+ struct pt_regs *regs)
+{
+ if (!(regs->psw.mask & PSW_MASK_PER))
+ return 0;
+ /* user space single step */
+ if (control == 0)
+ return 1;
+ /* over indication for storage alteration */
+ if ((control & 0x20200000) && (cause & 0x2000))
+ return 1;
+ if (cause & 0x8000) {
+ /* all branches */
+ if ((control & 0x80800000) == 0x80000000)
+ return 1;
+ /* branch into selected range */
+ if (((control & 0x80800000) == 0x80800000) &&
+ regs->psw.addr >= current->thread.per_user.start &&
+ regs->psw.addr <= current->thread.per_user.end)
+ return 1;
+ }
+ return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ int fixup = probe_get_fixup_type(auprobe->insn);
+ struct uprobe_task *utask = current->utask;
+
+ clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+ update_cr_regs(current);
+ psw_bits(regs->psw).r = auprobe->saved_per;
+ regs->int_code = auprobe->saved_int_code;
+
+ if (fixup & FIXUP_PSW_NORMAL)
+ regs->psw.addr += utask->vaddr - utask->xol_vaddr;
+ if (fixup & FIXUP_RETURN_REGISTER) {
+ int reg = (auprobe->insn[0] & 0xf0) >> 4;
+
+ regs->gprs[reg] += utask->vaddr - utask->xol_vaddr;
+ }
+ if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+ int ilen = insn_length(auprobe->insn[0] >> 8);
+
+ if (regs->psw.addr - utask->xol_vaddr == ilen)
+ regs->psw.addr = utask->vaddr + ilen;
+ }
+ if (check_per_event(current->thread.per_event.cause,
+ current->thread.per_user.control, regs)) {
+ /* fix per address */
+ current->thread.per_event.address = utask->vaddr;
+ /* trigger per event */
+ set_pt_regs_flag(regs, PIF_PER_TRAP);
+ }
+ return 0;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ if (!user_mode(regs))
+ return NOTIFY_DONE;
+ if (regs->int_code & 0x200) /* Trap during transaction */
+ return NOTIFY_DONE;
+ switch (val) {
+ case DIE_BPT:
+ if (uprobe_pre_sstep_notifier(regs))
+ return NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (uprobe_post_sstep_notifier(regs))
+ return NOTIFY_STOP;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ clear_thread_flag(TIF_UPROBE_SINGLESTEP);
+ regs->int_code = auprobe->saved_int_code;
+ regs->psw.addr = current->utask->vaddr;
+ current->thread.per_event.address = current->utask->vaddr;
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+ struct pt_regs *regs)
+{
+ unsigned long orig;
+
+ orig = regs->gprs[14];
+ regs->gprs[14] = trampoline;
+ return orig;
+}
+
+/* Instruction Emulation */
+
+static void adjust_psw_addr(psw_t *psw, unsigned long len)
+{
+ psw->addr = __rewind_psw(*psw, -len);
+}
+
+#define EMU_ILLEGAL_OP 1
+#define EMU_SPECIFICATION 2
+#define EMU_ADDRESSING 3
+
+#define emu_load_ril(ptr, output) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(*(ptr)) input; \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (get_user(input, ptr)) \
+ __rc = EMU_ADDRESSING; \
+ else \
+ *(output) = input; \
+ __rc; \
+})
+
+#define emu_store_ril(regs, ptr, input) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(ptr) __ptr = (ptr); \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)__ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (put_user(*(input), __ptr)) \
+ __rc = EMU_ADDRESSING; \
+ if (__rc == 0) \
+ sim_stor_event(regs, \
+ (void __force *)__ptr, \
+ mask + 1); \
+ __rc; \
+})
+
+#define emu_cmp_ril(regs, ptr, cmp) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(*(ptr)) input; \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (get_user(input, ptr)) \
+ __rc = EMU_ADDRESSING; \
+ else if (input > *(cmp)) \
+ psw_bits((regs)->psw).cc = 1; \
+ else if (input < *(cmp)) \
+ psw_bits((regs)->psw).cc = 2; \
+ else \
+ psw_bits((regs)->psw).cc = 0; \
+ __rc; \
+})
+
+struct insn_ril {
+ u8 opc0;
+ u8 reg : 4;
+ u8 opc1 : 4;
+ s32 disp;
+} __packed;
+
+union split_register {
+ u64 u64;
+ u32 u32[2];
+ u16 u16[4];
+ s64 s64;
+ s32 s32[2];
+ s16 s16[4];
+};
+
+/*
+ * If user per registers are setup to trace storage alterations and an
+ * emulated store took place on a fitting address a user trap is generated.
+ */
+static void sim_stor_event(struct pt_regs *regs, void *addr, int len)
+{
+ if (!(regs->psw.mask & PSW_MASK_PER))
+ return;
+ if (!(current->thread.per_user.control & PER_EVENT_STORE))
+ return;
+ if ((void *)current->thread.per_user.start > (addr + len))
+ return;
+ if ((void *)current->thread.per_user.end < addr)
+ return;
+ current->thread.per_event.address = regs->psw.addr;
+ current->thread.per_event.cause = PER_EVENT_STORE >> 16;
+ set_pt_regs_flag(regs, PIF_PER_TRAP);
+}
+
+/*
+ * pc relative instructions are emulated, since parameters may not be
+ * accessible from the xol area due to range limitations.
+ */
+static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ union split_register *rx;
+ struct insn_ril *insn;
+ unsigned int ilen;
+ void *uptr;
+ int rc = 0;
+
+ insn = (struct insn_ril *) &auprobe->insn;
+ rx = (union split_register *) &regs->gprs[insn->reg];
+ uptr = (void *)(regs->psw.addr + (insn->disp * 2));
+ ilen = insn_length(insn->opc0);
+
+ switch (insn->opc0) {
+ case 0xc0:
+ switch (insn->opc1) {
+ case 0x00: /* larl */
+ rx->u64 = (unsigned long)uptr;
+ break;
+ }
+ break;
+ case 0xc4:
+ switch (insn->opc1) {
+ case 0x02: /* llhrl */
+ rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x04: /* lghrl */
+ rc = emu_load_ril((s16 __user *)uptr, &rx->u64);
+ break;
+ case 0x05: /* lhrl */
+ rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x06: /* llghrl */
+ rc = emu_load_ril((u16 __user *)uptr, &rx->u64);
+ break;
+ case 0x08: /* lgrl */
+ rc = emu_load_ril((u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0c: /* lgfrl */
+ rc = emu_load_ril((s32 __user *)uptr, &rx->u64);
+ break;
+ case 0x0d: /* lrl */
+ rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x0e: /* llgfrl */
+ rc = emu_load_ril((u32 __user *)uptr, &rx->u64);
+ break;
+ case 0x07: /* sthrl */
+ rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]);
+ break;
+ case 0x0b: /* stgrl */
+ rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0f: /* strl */
+ rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+ break;
+ }
+ break;
+ case 0xc6:
+ switch (insn->opc1) {
+ case 0x02: /* pfdrl */
+ if (!test_facility(34))
+ rc = EMU_ILLEGAL_OP;
+ break;
+ case 0x04: /* cghrl */
+ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64);
+ break;
+ case 0x05: /* chrl */
+ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]);
+ break;
+ case 0x06: /* clghrl */
+ rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64);
+ break;
+ case 0x07: /* clhrl */
+ rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x08: /* cgrl */
+ rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64);
+ break;
+ case 0x0a: /* clgrl */
+ rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0c: /* cgfrl */
+ rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64);
+ break;
+ case 0x0d: /* crl */
+ rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]);
+ break;
+ case 0x0e: /* clgfrl */
+ rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64);
+ break;
+ case 0x0f: /* clrl */
+ rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+ break;
+ }
+ break;
+ }
+ adjust_psw_addr(&regs->psw, ilen);
+ switch (rc) {
+ case EMU_ILLEGAL_OP:
+ regs->int_code = ilen << 16 | 0x0001;
+ do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL);
+ break;
+ case EMU_SPECIFICATION:
+ regs->int_code = ilen << 16 | 0x0006;
+ do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL);
+ break;
+ case EMU_ADDRESSING:
+ regs->int_code = ilen << 16 | 0x0005;
+ do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL);
+ break;
+ }
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) ||
+ ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) &&
+ !is_compat_task())) {
+ regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
+ do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
+ return true;
+ }
+ if (probe_is_insn_relative_long(auprobe->insn)) {
+ handle_insn_ril(auprobe, regs);
+ return true;
+ }
+ return false;
+}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
new file mode 100644
index 000000000..0d58269ff
--- /dev/null
+++ b/arch/s390/kernel/vdso.c
@@ -0,0 +1,306 @@
+/*
+ * vdso setup for s390
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/security.h>
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/asm-offsets.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/vdso.h>
+#include <asm/facility.h>
+
+#ifdef CONFIG_COMPAT
+extern char vdso32_start, vdso32_end;
+static void *vdso32_kbase = &vdso32_start;
+static unsigned int vdso32_pages;
+static struct page **vdso32_pagelist;
+#endif
+
+extern char vdso64_start, vdso64_end;
+static void *vdso64_kbase = &vdso64_start;
+static unsigned int vdso64_pages;
+static struct page **vdso64_pagelist;
+
+/*
+ * Should the kernel map a VDSO page into processes and pass its
+ * address down to glibc upon exec()?
+ */
+unsigned int __read_mostly vdso_enabled = 1;
+
+static int __init vdso_setup(char *s)
+{
+ unsigned long val;
+ int rc;
+
+ rc = 0;
+ if (strncmp(s, "on", 3) == 0)
+ vdso_enabled = 1;
+ else if (strncmp(s, "off", 4) == 0)
+ vdso_enabled = 0;
+ else {
+ rc = kstrtoul(s, 0, &val);
+ vdso_enabled = rc ? 0 : !!val;
+ }
+ return !rc;
+}
+__setup("vdso=", vdso_setup);
+
+/*
+ * The vdso data page
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+/*
+ * Setup vdso data page.
+ */
+static void vdso_init_data(struct vdso_data *vd)
+{
+ vd->ectg_available = test_facility(31);
+}
+
+/*
+ * Allocate/free per cpu vdso data.
+ */
+#define SEGMENT_ORDER 2
+
+int vdso_alloc_per_cpu(struct _lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ u32 *psal, *aste;
+ int i;
+
+ lowcore->vdso_per_cpu_data = __LC_PASTE;
+
+ if (!vdso_enabled)
+ return 0;
+
+ segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
+ page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ page_frame = get_zeroed_page(GFP_KERNEL);
+ if (!segment_table || !page_table || !page_frame)
+ goto out;
+
+ clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
+ PAGE_SIZE << SEGMENT_ORDER);
+ clear_table((unsigned long *) page_table, _PAGE_INVALID,
+ 256*sizeof(unsigned long));
+
+ *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
+ *(unsigned long *) page_table = _PAGE_PROTECT + page_frame;
+
+ psal = (u32 *) (page_table + 256*sizeof(unsigned long));
+ aste = psal + 32;
+
+ for (i = 4; i < 32; i += 4)
+ psal[i] = 0x80000000;
+
+ lowcore->paste[4] = (u32)(addr_t) psal;
+ psal[0] = 0x02000000;
+ psal[2] = (u32)(addr_t) aste;
+ *(unsigned long *) (aste + 2) = segment_table +
+ _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
+ aste[4] = (u32)(addr_t) psal;
+ lowcore->vdso_per_cpu_data = page_frame;
+
+ return 0;
+
+out:
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+ return -ENOMEM;
+}
+
+void vdso_free_per_cpu(struct _lowcore *lowcore)
+{
+ unsigned long segment_table, page_table, page_frame;
+ u32 *psal, *aste;
+
+ if (!vdso_enabled)
+ return;
+
+ psal = (u32 *)(addr_t) lowcore->paste[4];
+ aste = (u32 *)(addr_t) psal[2];
+ segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
+ page_table = *(unsigned long *) segment_table;
+ page_frame = *(unsigned long *) page_table;
+
+ free_page(page_frame);
+ free_page(page_table);
+ free_pages(segment_table, SEGMENT_ORDER);
+}
+
+static void vdso_init_cr5(void)
+{
+ unsigned long cr5;
+
+ if (!vdso_enabled)
+ return;
+ cr5 = offsetof(struct _lowcore, paste);
+ __ctl_load(cr5, 5, 5);
+}
+
+/*
+ * This is called from binfmt_elf, we create the special vma for the
+ * vDSO and insert it into the mm struct tree
+ */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ struct page **vdso_pagelist;
+ unsigned long vdso_pages;
+ unsigned long vdso_base;
+ int rc;
+
+ if (!vdso_enabled)
+ return 0;
+ /*
+ * Only map the vdso for dynamically linked elf binaries.
+ */
+ if (!uses_interp)
+ return 0;
+
+ vdso_pagelist = vdso64_pagelist;
+ vdso_pages = vdso64_pages;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ vdso_pagelist = vdso32_pagelist;
+ vdso_pages = vdso32_pages;
+ }
+#endif
+ /*
+ * vDSO has a problem and was disabled, just don't "enable" it for
+ * the process
+ */
+ if (vdso_pages == 0)
+ return 0;
+
+ current->mm->context.vdso_base = 0;
+
+ /*
+ * pick a base address for the vDSO in process space. We try to put
+ * it at vdso_base which is the "natural" base for it, but we might
+ * fail and end up putting it elsewhere.
+ */
+ down_write(&mm->mmap_sem);
+ vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ rc = vdso_base;
+ goto out_up;
+ }
+
+ /*
+ * Put vDSO base into mm struct. We need to do this before calling
+ * install_special_mapping or the perf counter mmap tracking code
+ * will fail to recognise it as a vDSO (since arch_vma_name fails).
+ */
+ current->mm->context.vdso_base = vdso_base;
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process
+ * isn't allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW
+ * on those pages but it's then your responsibility to never do that
+ * on the "data" page of the vDSO or you'll stop getting kernel
+ * updates and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though.
+ */
+ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_pagelist);
+ if (rc)
+ current->mm->context.vdso_base = 0;
+out_up:
+ up_write(&mm->mmap_sem);
+ return rc;
+}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
+ return "[vdso]";
+ return NULL;
+}
+
+static int __init vdso_init(void)
+{
+ int i;
+
+ if (!vdso_enabled)
+ return 0;
+ vdso_init_data(vdso_data);
+#ifdef CONFIG_COMPAT
+ /* Calculate the size of the 32 bit vDSO */
+ vdso32_pages = ((&vdso32_end - &vdso32_start
+ + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
+
+ /* Make sure pages are in the correct state */
+ vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 1),
+ GFP_KERNEL);
+ BUG_ON(vdso32_pagelist == NULL);
+ for (i = 0; i < vdso32_pages - 1; i++) {
+ struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ vdso32_pagelist[i] = pg;
+ }
+ vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
+ vdso32_pagelist[vdso32_pages] = NULL;
+#endif
+
+ /* Calculate the size of the 64 bit vDSO */
+ vdso64_pages = ((&vdso64_end - &vdso64_start
+ + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
+
+ /* Make sure pages are in the correct state */
+ vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 1),
+ GFP_KERNEL);
+ BUG_ON(vdso64_pagelist == NULL);
+ for (i = 0; i < vdso64_pages - 1; i++) {
+ struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ get_page(pg);
+ vdso64_pagelist[i] = pg;
+ }
+ vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
+ vdso64_pagelist[vdso64_pages] = NULL;
+ if (vdso_alloc_per_cpu(&S390_lowcore))
+ BUG();
+ vdso_init_cr5();
+
+ get_page(virt_to_page(vdso_data));
+
+ smp_wmb();
+
+ return 0;
+}
+early_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
new file mode 100644
index 000000000..e45fba9d0
--- /dev/null
+++ b/arch/s390/kernel/vdso32/.gitignore
@@ -0,0 +1 @@
+vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
new file mode 100644
index 000000000..8ad2b34ad
--- /dev/null
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -0,0 +1,58 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_31 += -m31 -s
+
+KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
+
+obj-y += vdso32_wrapper.o
+extra-y += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+ $(call if_changed,vdso32ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso32): %.o: %.S
+ $(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32ld = VDSO32L $@
+ cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso32.so: $(obj)/vdso32.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
new file mode 100644
index 000000000..eca3f001f
--- /dev/null
+++ b/arch/s390/kernel/vdso32/clock_getres.S
@@ -0,0 +1,46 @@
+/*
+ * Userland implementation of clock_getres() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_getres
+ .type __kernel_clock_getres,@function
+__kernel_clock_getres:
+ .cfi_startproc
+ basr %r1,0
+ la %r1,4f-.(%r1)
+ chi %r2,__CLOCK_REALTIME
+ je 0f
+ chi %r2,__CLOCK_MONOTONIC
+ je 0f
+ la %r1,5f-4f(%r1)
+ chi %r2,__CLOCK_REALTIME_COARSE
+ je 0f
+ chi %r2,__CLOCK_MONOTONIC_COARSE
+ jne 3f
+0: ltr %r3,%r3
+ jz 2f /* res == NULL */
+1: l %r0,0(%r1)
+ xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
+ st %r0,4(%r3) /* store tp->tv_usec */
+2: lhi %r2,0
+ br %r14
+3: lhi %r1,__NR_clock_getres /* fallback to svc */
+ svc 0
+ br %r14
+4: .long __CLOCK_REALTIME_RES
+5: .long __CLOCK_COARSE_RES
+ .cfi_endproc
+ .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
new file mode 100644
index 000000000..5eec9afbb
--- /dev/null
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -0,0 +1,149 @@
+/*
+ * Userland implementation of clock_gettime() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_gettime
+ .type __kernel_clock_gettime,@function
+__kernel_clock_gettime:
+ .cfi_startproc
+ ahi %r15,-16
+ basr %r5,0
+0: al %r5,21f-0b(%r5) /* get &_vdso_data */
+ chi %r2,__CLOCK_REALTIME_COARSE
+ je 10f
+ chi %r2,__CLOCK_REALTIME
+ je 11f
+ chi %r2,__CLOCK_MONOTONIC_COARSE
+ je 9f
+ chi %r2,__CLOCK_MONOTONIC
+ jne 19f
+
+ /* CLOCK_MONOTONIC */
+1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 1b
+ stcke 0(%r15) /* Store TOD clock */
+ lm %r0,%r1,1(%r15)
+ s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,2f
+ ahi %r0,-1
+2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ lr %r2,%r0
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 3f
+ a %r0,__VDSO_TK_MULT(%r5)
+3: alr %r0,%r2
+ al %r0,__VDSO_WTOM_NSEC(%r5)
+ al %r1,__VDSO_WTOM_NSEC+4(%r5)
+ brc 12,5f
+ ahi %r0,1
+5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r2) /* >> tk->shift */
+ l %r2,__VDSO_WTOM_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 1b
+ basr %r5,0
+6: ltr %r0,%r0
+ jnz 7f
+ cl %r1,20f-6b(%r5)
+ jl 8f
+7: ahi %r2,1
+ sl %r1,20f-6b(%r5)
+ brc 3,6b
+ ahi %r0,-1
+ j 6b
+8: st %r2,0(%r3) /* store tp->tv_sec */
+ st %r1,4(%r3) /* store tp->tv_nsec */
+ lhi %r2,0
+ ahi %r15,16
+ br %r14
+
+ /* CLOCK_MONOTONIC_COARSE */
+9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 9b
+ l %r2,__VDSO_WTOM_CRS_SEC+4(%r5)
+ l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 9b
+ j 8b
+
+ /* CLOCK_REALTIME_COARSE */
+10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 10b
+ l %r2,__VDSO_XTIME_CRS_SEC+4(%r5)
+ l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 10b
+ j 17f
+
+ /* CLOCK_REALTIME */
+11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 11b
+ stcke 0(%r15) /* Store TOD clock */
+ lm %r0,%r1,1(%r15)
+ s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,12f
+ ahi %r0,-1
+12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ lr %r2,%r0
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 13f
+ a %r0,__VDSO_TK_MULT(%r5)
+13: alr %r0,%r2
+ al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ al %r1,__VDSO_XTIME_NSEC+4(%r5)
+ brc 12,14f
+ ahi %r0,1
+14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r2) /* >> tk->shift */
+ l %r2,__VDSO_XTIME_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 11b
+ basr %r5,0
+15: ltr %r0,%r0
+ jnz 16f
+ cl %r1,20f-15b(%r5)
+ jl 17f
+16: ahi %r2,1
+ sl %r1,20f-15b(%r5)
+ brc 3,15b
+ ahi %r0,-1
+ j 15b
+17: st %r2,0(%r3) /* store tp->tv_sec */
+ st %r1,4(%r3) /* store tp->tv_nsec */
+ lhi %r2,0
+ ahi %r15,16
+ br %r14
+
+ /* Fallback to system call */
+19: lhi %r1,__NR_clock_gettime
+ svc 0
+ ahi %r15,16
+ br %r14
+
+20: .long 1000000000
+21: .long _vdso_data - 0b
+ .cfi_endproc
+ .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
new file mode 100644
index 000000000..719de6186
--- /dev/null
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -0,0 +1,81 @@
+/*
+ * Userland implementation of gettimeofday() for 32 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_gettimeofday
+ .type __kernel_gettimeofday,@function
+__kernel_gettimeofday:
+ .cfi_startproc
+ ahi %r15,-16
+ basr %r5,0
+0: al %r5,13f-0b(%r5) /* get &_vdso_data */
+1: ltr %r3,%r3 /* check if tz is NULL */
+ je 2f
+ mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
+2: ltr %r2,%r2 /* check if tv is NULL */
+ je 10f
+ l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 1b
+ stcke 0(%r15) /* Store TOD clock */
+ lm %r0,%r1,1(%r15)
+ s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ sl %r1,__VDSO_XTIME_STAMP+4(%r5)
+ brc 3,3f
+ ahi %r0,-1
+3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ st %r0,0(%r15)
+ l %r0,__VDSO_TK_MULT(%r5)
+ ltr %r1,%r1
+ mr %r0,%r0
+ jnm 4f
+ a %r0,__VDSO_TK_MULT(%r5)
+4: al %r0,0(%r15)
+ al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
+ al %r1,__VDSO_XTIME_NSEC+4(%r5)
+ brc 12,5f
+ ahi %r0,1
+5: mvc 0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 1b
+ l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srdl %r0,0(%r4) /* >> tk->shift */
+ l %r4,0(%r15) /* get tv_sec from stack */
+ basr %r5,0
+6: ltr %r0,%r0
+ jnz 7f
+ cl %r1,11f-6b(%r5)
+ jl 8f
+7: ahi %r4,1
+ sl %r1,11f-6b(%r5)
+ brc 3,6b
+ ahi %r0,-1
+ j 6b
+8: st %r4,0(%r2) /* store tv->tv_sec */
+ ltr %r1,%r1
+ m %r0,12f-6b(%r5)
+ jnm 9f
+ al %r0,12f-6b(%r5)
+9: srl %r0,6
+ st %r0,4(%r2) /* store tv->tv_usec */
+10: slr %r2,%r2
+ ahi %r15,16
+ br %r14
+11: .long 1000000000
+12: .long 274877907
+13: .long _vdso_data - 0b
+ .cfi_endproc
+ .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
new file mode 100644
index 000000000..79a071e43
--- /dev/null
+++ b/arch/s390/kernel/vdso32/note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 000000000..a8c379fa1
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,138 @@
+/*
+ * This is the infamous ld script for the 32 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+OUTPUT_ARCH(s390:31-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO32_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .got ALIGN(8) : { *(.got .toc) }
+
+ _end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the
+ * beginning of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+ . = ALIGN(4096);
+ PROVIDE(_vdso_data = .);
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.branch_lt)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+
+ local: *;
+ };
+}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 000000000..ae42f8ce3
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso32_start, vdso32_end
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/s390/kernel/vdso32/vdso32.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
new file mode 100644
index 000000000..3fd18cf9f
--- /dev/null
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -0,0 +1 @@
+vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
new file mode 100644
index 000000000..2a8ddfd12
--- /dev/null
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -0,0 +1,58 @@
+# List of files in the vdso, has to be asm only for now
+
+obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o
+
+# Build rules
+
+targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_64 += -m64 -s
+
+KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
+KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
+
+obj-y += vdso64_wrapper.o
+extra-y += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+ $(call if_changed,vdso64ld)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S
+ $(call if_changed_dep,vdso64as)
+
+# actual build commands
+quiet_cmd_vdso64ld = VDSO64L $@
+ cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso64.so: $(obj)/vdso64.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso64.so
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
new file mode 100644
index 000000000..c8513deb8
--- /dev/null
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -0,0 +1,52 @@
+/*
+ * Userland implementation of clock_getres() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_getres
+ .type __kernel_clock_getres,@function
+__kernel_clock_getres:
+ .cfi_startproc
+ larl %r1,4f
+ cghi %r2,__CLOCK_REALTIME_COARSE
+ je 0f
+ cghi %r2,__CLOCK_MONOTONIC_COARSE
+ je 0f
+ larl %r1,3f
+ cghi %r2,__CLOCK_REALTIME
+ je 0f
+ cghi %r2,__CLOCK_MONOTONIC
+ je 0f
+ cghi %r2,__CLOCK_THREAD_CPUTIME_ID
+ je 0f
+ cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+ jne 2f
+ larl %r5,_vdso_data
+ icm %r0,15,__LC_ECTG_OK(%r5)
+ jz 2f
+0: ltgr %r3,%r3
+ jz 1f /* res == NULL */
+ lg %r0,0(%r1)
+ xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
+ stg %r0,8(%r3) /* store tp->tv_usec */
+1: lghi %r2,0
+ br %r14
+2: lghi %r1,__NR_clock_getres /* fallback to svc */
+ svc 0
+ br %r14
+3: .quad __CLOCK_REALTIME_RES
+4: .quad __CLOCK_COARSE_RES
+ .cfi_endproc
+ .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
new file mode 100644
index 000000000..61541fb93
--- /dev/null
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -0,0 +1,150 @@
+/*
+ * Userland implementation of clock_gettime() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_clock_gettime
+ .type __kernel_clock_gettime,@function
+__kernel_clock_gettime:
+ .cfi_startproc
+ aghi %r15,-16
+ larl %r5,_vdso_data
+ cghi %r2,__CLOCK_REALTIME_COARSE
+ je 4f
+ cghi %r2,__CLOCK_REALTIME
+ je 5f
+ cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+ je 9f
+ cghi %r2,__CLOCK_MONOTONIC_COARSE
+ je 3f
+ cghi %r2,__CLOCK_MONOTONIC
+ jne 12f
+
+ /* CLOCK_MONOTONIC */
+0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 0b
+ stcke 0(%r15) /* Store TOD clock */
+ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ lg %r0,__VDSO_WTOM_SEC(%r5)
+ lg %r1,1(%r15)
+ sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_WTOM_NSEC(%r5)
+ srlg %r1,%r1,0(%r2) /* >> tk->shift */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 0b
+ larl %r5,13f
+1: clg %r1,0(%r5)
+ jl 2f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 1b
+2: stg %r0,0(%r3) /* store tp->tv_sec */
+ stg %r1,8(%r3) /* store tp->tv_nsec */
+ lghi %r2,0
+ aghi %r15,16
+ br %r14
+
+ /* CLOCK_MONOTONIC_COARSE */
+3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 3b
+ lg %r0,__VDSO_WTOM_CRS_SEC(%r5)
+ lg %r1,__VDSO_WTOM_CRS_NSEC(%r5)
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 3b
+ j 2b
+
+ /* CLOCK_REALTIME_COARSE */
+4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 4b
+ lg %r0,__VDSO_XTIME_CRS_SEC(%r5)
+ lg %r1,__VDSO_XTIME_CRS_NSEC(%r5)
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 4b
+ j 7f
+
+ /* CLOCK_REALTIME */
+5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 5b
+ stcke 0(%r15) /* Store TOD clock */
+ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ lg %r1,1(%r15)
+ sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ srlg %r1,%r1,0(%r2) /* >> tk->shift */
+ lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 5b
+ larl %r5,13f
+6: clg %r1,0(%r5)
+ jl 7f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 6b
+7: stg %r0,0(%r3) /* store tp->tv_sec */
+ stg %r1,8(%r3) /* store tp->tv_nsec */
+ lghi %r2,0
+ aghi %r15,16
+ br %r14
+
+ /* CPUCLOCK_VIRT for this thread */
+9: icm %r0,15,__VDSO_ECTG_OK(%r5)
+ jz 12f
+ ear %r2,%a4
+ llilh %r4,0x0100
+ sar %a4,%r4
+ lghi %r4,0
+ epsw %r5,0
+ sacf 512 /* Magic ectg instruction */
+ .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
+ tml %r5,0x4000
+ jo 11f
+ tml %r5,0x8000
+ jno 10f
+ sacf 256
+ j 11f
+10: sacf 0
+11: sar %a4,%r2
+ algr %r1,%r0 /* r1 = cputime as TOD value */
+ mghi %r1,1000 /* convert to nanoseconds */
+ srlg %r1,%r1,12 /* r1 = cputime in nanosec */
+ lgr %r4,%r1
+ larl %r5,13f
+ srlg %r1,%r1,9 /* divide by 1000000000 */
+ mlg %r0,8(%r5)
+ srlg %r0,%r0,11 /* r0 = tv_sec */
+ stg %r0,0(%r3)
+ msg %r0,0(%r5) /* calculate tv_nsec */
+ slgr %r4,%r0 /* r4 = tv_nsec */
+ stg %r4,8(%r3)
+ lghi %r2,0
+ aghi %r15,16
+ br %r14
+
+ /* Fallback to system call */
+12: lghi %r1,__NR_clock_gettime
+ svc 0
+ aghi %r15,16
+ br %r14
+
+13: .quad 1000000000
+14: .quad 19342813113834067
+ .cfi_endproc
+ .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
new file mode 100644
index 000000000..6ce467076
--- /dev/null
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -0,0 +1,59 @@
+/*
+ * Userland implementation of gettimeofday() for 64 bits processes in a
+ * s390 kernel for use in the vDSO
+ *
+ * Copyright IBM Corp. 2008
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+#include <asm/vdso.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+ .text
+ .align 4
+ .globl __kernel_gettimeofday
+ .type __kernel_gettimeofday,@function
+__kernel_gettimeofday:
+ .cfi_startproc
+ aghi %r15,-16
+ larl %r5,_vdso_data
+0: ltgr %r3,%r3 /* check if tz is NULL */
+ je 1f
+ mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
+1: ltgr %r2,%r2 /* check if tv is NULL */
+ je 4f
+ lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 0b
+ stcke 0(%r15) /* Store TOD clock */
+ lg %r1,1(%r15)
+ sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
+ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
+ lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 0b
+ lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
+ srlg %r1,%r1,0(%r5) /* >> tk->shift */
+ larl %r5,5f
+2: clg %r1,0(%r5)
+ jl 3f
+ slg %r1,0(%r5)
+ aghi %r0,1
+ j 2b
+3: stg %r0,0(%r2) /* store tv->tv_sec */
+ slgr %r0,%r0 /* tv_nsec -> tv_usec */
+ ml %r0,8(%r5)
+ srlg %r0,%r0,6
+ stg %r0,8(%r2) /* store tv->tv_usec */
+4: lghi %r2,0
+ aghi %r15,16
+ br %r14
+5: .quad 1000000000
+ .long 274877907
+ .cfi_endproc
+ .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
new file mode 100644
index 000000000..79a071e43
--- /dev/null
+++ b/arch/s390/kernel/vdso64/note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 000000000..9f5979d10
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,138 @@
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = VDSO64_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ . = ALIGN(16);
+ .text : {
+ *(.text .stub .text.* .gnu.linkonce.t.*)
+ } :text
+ PROVIDE(__etext = .);
+ PROVIDE(_etext = .);
+ PROVIDE(etext = .);
+
+ /*
+ * Other stuff is appended to the text segment:
+ */
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata1 : { *(.rodata1) }
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .got ALIGN(8) : { *(.got .toc) }
+
+ _end = .;
+ PROVIDE(end = .);
+
+ /*
+ * Stabs debugging sections are here too.
+ */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+ .stab.excl 0 : { *(.stab.excl) }
+ .stab.exclstr 0 : { *(.stab.exclstr) }
+ .stab.index 0 : { *(.stab.index) }
+ .stab.indexstr 0 : { *(.stab.indexstr) }
+ .comment 0 : { *(.comment) }
+
+ /*
+ * DWARF debug sections.
+ * Symbols in the DWARF debugging sections are relative to the
+ * beginning of the section so we begin them at 0.
+ */
+ /* DWARF 1 */
+ .debug 0 : { *(.debug) }
+ .line 0 : { *(.line) }
+ /* GNU DWARF 1 extensions */
+ .debug_srcinfo 0 : { *(.debug_srcinfo) }
+ .debug_sfnames 0 : { *(.debug_sfnames) }
+ /* DWARF 1.1 and DWARF 2 */
+ .debug_aranges 0 : { *(.debug_aranges) }
+ .debug_pubnames 0 : { *(.debug_pubnames) }
+ /* DWARF 2 */
+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
+ .debug_abbrev 0 : { *(.debug_abbrev) }
+ .debug_line 0 : { *(.debug_line) }
+ .debug_frame 0 : { *(.debug_frame) }
+ .debug_str 0 : { *(.debug_str) }
+ .debug_loc 0 : { *(.debug_loc) }
+ .debug_macinfo 0 : { *(.debug_macinfo) }
+ /* SGI/MIPS DWARF 2 extensions */
+ .debug_weaknames 0 : { *(.debug_weaknames) }
+ .debug_funcnames 0 : { *(.debug_funcnames) }
+ .debug_typenames 0 : { *(.debug_typenames) }
+ .debug_varnames 0 : { *(.debug_varnames) }
+ /* DWARF 3 */
+ .debug_pubtypes 0 : { *(.debug_pubtypes) }
+ .debug_ranges 0 : { *(.debug_ranges) }
+ .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+ . = ALIGN(4096);
+ PROVIDE(_vdso_data = .);
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.branch_lt)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME 0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ /*
+ * Has to be there for the kernel to find
+ */
+ __kernel_gettimeofday;
+ __kernel_clock_gettime;
+ __kernel_clock_getres;
+
+ local: *;
+ };
+}
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 000000000..c245842b5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso64_start, vdso64_end
+ .balign PAGE_SIZE
+vdso64_start:
+ .incbin "arch/s390/kernel/vdso64/vdso64.so"
+ .balign PAGE_SIZE
+vdso64_end:
+
+ .previous
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
new file mode 100644
index 000000000..445657fe6
--- /dev/null
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -0,0 +1,93 @@
+/* ld script to make s390 Linux kernel
+ * Written by Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(startup)
+jiffies = jiffies_64;
+
+PHDRS {
+ text PT_LOAD FLAGS(5); /* R_E */
+ data PT_LOAD FLAGS(7); /* RWE */
+ note PT_NOTE FLAGS(0); /* ___ */
+}
+
+SECTIONS
+{
+ . = 0x00000000;
+ .text : {
+ _text = .; /* Text and read-only data */
+ HEAD_TEXT
+ TEXT_TEXT
+ SCHED_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ IRQENTRY_TEXT
+ *(.fixup)
+ *(.gnu.warning)
+ } :text = 0x0700
+
+ _etext = .; /* End of text section */
+
+ NOTES :text :note
+
+ .dummy : { *(.dummy) } :data
+
+ RO_DATA_SECTION(PAGE_SIZE)
+
+#ifdef CONFIG_SHARED_KERNEL
+ . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */
+#endif
+
+ . = ALIGN(PAGE_SIZE);
+ _eshared = .; /* End of shareable data */
+ _sdata = .; /* Start of data section */
+
+ EXCEPTION_TABLE(16) :data
+
+ RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+
+ _edata = .; /* End of data section */
+
+ /* will be freed after init */
+ . = ALIGN(PAGE_SIZE); /* Init code and data */
+ __init_begin = .;
+
+ INIT_TEXT_SECTION(PAGE_SIZE)
+
+ /*
+ * .exit.text is discarded at runtime, not link time,
+ * to deal with references from __bug_table
+ */
+ .exit.text : {
+ EXIT_TEXT
+ }
+
+ .exit.data : {
+ EXIT_DATA
+ }
+
+ /* early.c uses stsi, which requires page aligned data. */
+ . = ALIGN(PAGE_SIZE);
+ INIT_DATA_SECTION(0x100)
+
+ PERCPU_SECTION(0x100)
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .; /* freed after init ends here */
+
+ BSS_SECTION(0, 2, 0)
+
+ _end = . ;
+
+ /* Debugging sections. */
+ STABS_DEBUG
+ DWARF_DEBUG
+
+ /* Sections to be discarded */
+ DISCARDS
+}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
new file mode 100644
index 000000000..e53d3595a
--- /dev/null
+++ b/arch/s390/kernel/vtime.c
@@ -0,0 +1,379 @@
+/*
+ * Virtual cpu timer based timer functions.
+ *
+ * Copyright IBM Corp. 2004, 2012
+ * Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/timex.h>
+#include <linux/types.h>
+#include <linux/time.h>
+
+#include <asm/cputime.h>
+#include <asm/vtimer.h>
+#include <asm/vtime.h>
+#include <asm/cpu_mf.h>
+#include <asm/smp.h>
+
+static void virt_timer_expire(void);
+
+static LIST_HEAD(virt_timer_list);
+static DEFINE_SPINLOCK(virt_timer_lock);
+static atomic64_t virt_timer_current;
+static atomic64_t virt_timer_elapsed;
+
+static DEFINE_PER_CPU(u64, mt_cycles[32]);
+static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+
+static inline u64 get_vtimer(void)
+{
+ u64 timer;
+
+ asm volatile("stpt %0" : "=m" (timer));
+ return timer;
+}
+
+static inline void set_vtimer(u64 expires)
+{
+ u64 timer;
+
+ asm volatile(
+ " stpt %0\n" /* Store current cpu timer value */
+ " spt %1" /* Set new value imm. afterwards */
+ : "=m" (timer) : "m" (expires));
+ S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
+ S390_lowcore.last_update_timer = expires;
+}
+
+static inline int virt_timer_forward(u64 elapsed)
+{
+ BUG_ON(!irqs_disabled());
+
+ if (list_empty(&virt_timer_list))
+ return 0;
+ elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
+ return elapsed >= atomic64_read(&virt_timer_current);
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
+{
+ struct thread_info *ti = task_thread_info(tsk);
+ u64 timer, clock, user, system, steal;
+ u64 user_scaled, system_scaled;
+ int i;
+
+ timer = S390_lowcore.last_update_timer;
+ clock = S390_lowcore.last_update_clock;
+ asm volatile(
+ " stpt %0\n" /* Store current cpu timer value */
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ " stckf %1" /* Store current tod clock value */
+#else
+ " stck %1" /* Store current tod clock value */
+#endif
+ : "=m" (S390_lowcore.last_update_timer),
+ "=m" (S390_lowcore.last_update_clock));
+ S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
+ S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
+
+ /* Do MT utilization calculation */
+ if (smp_cpu_mtid) {
+ u64 cycles_new[32], *cycles_old;
+ u64 delta, mult, div;
+
+ cycles_old = this_cpu_ptr(mt_cycles);
+ if (stcctm5(smp_cpu_mtid + 1, cycles_new) < 2) {
+ mult = div = 0;
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ delta = cycles_new[i] - cycles_old[i];
+ mult += delta;
+ div += (i + 1) * delta;
+ }
+ if (mult > 0) {
+ /* Update scaling factor */
+ __this_cpu_write(mt_scaling_mult, mult);
+ __this_cpu_write(mt_scaling_div, div);
+ memcpy(cycles_old, cycles_new,
+ sizeof(u64) * (smp_cpu_mtid + 1));
+ }
+ }
+ }
+
+ user = S390_lowcore.user_timer - ti->user_timer;
+ S390_lowcore.steal_timer -= user;
+ ti->user_timer = S390_lowcore.user_timer;
+
+ system = S390_lowcore.system_timer - ti->system_timer;
+ S390_lowcore.steal_timer -= system;
+ ti->system_timer = S390_lowcore.system_timer;
+
+ user_scaled = user;
+ system_scaled = system;
+ /* Do MT utilization scaling */
+ if (smp_cpu_mtid) {
+ u64 mult = __this_cpu_read(mt_scaling_mult);
+ u64 div = __this_cpu_read(mt_scaling_div);
+
+ user_scaled = (user_scaled * mult) / div;
+ system_scaled = (system_scaled * mult) / div;
+ }
+ account_user_time(tsk, user, user_scaled);
+ account_system_time(tsk, hardirq_offset, system, system_scaled);
+
+ steal = S390_lowcore.steal_timer;
+ if ((s64) steal > 0) {
+ S390_lowcore.steal_timer = 0;
+ account_steal_time(steal);
+ }
+
+ return virt_timer_forward(user + system);
+}
+
+void vtime_task_switch(struct task_struct *prev)
+{
+ struct thread_info *ti;
+
+ do_account_vtime(prev, 0);
+ ti = task_thread_info(prev);
+ ti->user_timer = S390_lowcore.user_timer;
+ ti->system_timer = S390_lowcore.system_timer;
+ ti = task_thread_info(current);
+ S390_lowcore.user_timer = ti->user_timer;
+ S390_lowcore.system_timer = ti->system_timer;
+}
+
+/*
+ * In s390, accounting pending user time also implies
+ * accounting system time in order to correctly compute
+ * the stolen time accounting.
+ */
+void vtime_account_user(struct task_struct *tsk)
+{
+ if (do_account_vtime(tsk, HARDIRQ_OFFSET))
+ virt_timer_expire();
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+void vtime_account_irq_enter(struct task_struct *tsk)
+{
+ struct thread_info *ti = task_thread_info(tsk);
+ u64 timer, system, system_scaled;
+
+ timer = S390_lowcore.last_update_timer;
+ S390_lowcore.last_update_timer = get_vtimer();
+ S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
+
+ system = S390_lowcore.system_timer - ti->system_timer;
+ S390_lowcore.steal_timer -= system;
+ ti->system_timer = S390_lowcore.system_timer;
+ system_scaled = system;
+ /* Do MT utilization scaling */
+ if (smp_cpu_mtid) {
+ u64 mult = __this_cpu_read(mt_scaling_mult);
+ u64 div = __this_cpu_read(mt_scaling_div);
+
+ system_scaled = (system_scaled * mult) / div;
+ }
+ account_system_time(tsk, 0, system, system_scaled);
+
+ virt_timer_forward(system);
+}
+EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
+
+void vtime_account_system(struct task_struct *tsk)
+__attribute__((alias("vtime_account_irq_enter")));
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
+/*
+ * Sorted add to a list. List is linear searched until first bigger
+ * element is found.
+ */
+static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
+{
+ struct vtimer_list *tmp;
+
+ list_for_each_entry(tmp, head, entry) {
+ if (tmp->expires > timer->expires) {
+ list_add_tail(&timer->entry, &tmp->entry);
+ return;
+ }
+ }
+ list_add_tail(&timer->entry, head);
+}
+
+/*
+ * Handler for expired virtual CPU timer.
+ */
+static void virt_timer_expire(void)
+{
+ struct vtimer_list *timer, *tmp;
+ unsigned long elapsed;
+ LIST_HEAD(cb_list);
+
+ /* walk timer list, fire all expired timers */
+ spin_lock(&virt_timer_lock);
+ elapsed = atomic64_read(&virt_timer_elapsed);
+ list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) {
+ if (timer->expires < elapsed)
+ /* move expired timer to the callback queue */
+ list_move_tail(&timer->entry, &cb_list);
+ else
+ timer->expires -= elapsed;
+ }
+ if (!list_empty(&virt_timer_list)) {
+ timer = list_first_entry(&virt_timer_list,
+ struct vtimer_list, entry);
+ atomic64_set(&virt_timer_current, timer->expires);
+ }
+ atomic64_sub(elapsed, &virt_timer_elapsed);
+ spin_unlock(&virt_timer_lock);
+
+ /* Do callbacks and recharge periodic timers */
+ list_for_each_entry_safe(timer, tmp, &cb_list, entry) {
+ list_del_init(&timer->entry);
+ timer->function(timer->data);
+ if (timer->interval) {
+ /* Recharge interval timer */
+ timer->expires = timer->interval +
+ atomic64_read(&virt_timer_elapsed);
+ spin_lock(&virt_timer_lock);
+ list_add_sorted(timer, &virt_timer_list);
+ spin_unlock(&virt_timer_lock);
+ }
+ }
+}
+
+void init_virt_timer(struct vtimer_list *timer)
+{
+ timer->function = NULL;
+ INIT_LIST_HEAD(&timer->entry);
+}
+EXPORT_SYMBOL(init_virt_timer);
+
+static inline int vtimer_pending(struct vtimer_list *timer)
+{
+ return !list_empty(&timer->entry);
+}
+
+static void internal_add_vtimer(struct vtimer_list *timer)
+{
+ if (list_empty(&virt_timer_list)) {
+ /* First timer, just program it. */
+ atomic64_set(&virt_timer_current, timer->expires);
+ atomic64_set(&virt_timer_elapsed, 0);
+ list_add(&timer->entry, &virt_timer_list);
+ } else {
+ /* Update timer against current base. */
+ timer->expires += atomic64_read(&virt_timer_elapsed);
+ if (likely((s64) timer->expires <
+ (s64) atomic64_read(&virt_timer_current)))
+ /* The new timer expires before the current timer. */
+ atomic64_set(&virt_timer_current, timer->expires);
+ /* Insert new timer into the list. */
+ list_add_sorted(timer, &virt_timer_list);
+ }
+}
+
+static void __add_vtimer(struct vtimer_list *timer, int periodic)
+{
+ unsigned long flags;
+
+ timer->interval = periodic ? timer->expires : 0;
+ spin_lock_irqsave(&virt_timer_lock, flags);
+ internal_add_vtimer(timer);
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
+}
+
+/*
+ * add_virt_timer - add an oneshot virtual CPU timer
+ */
+void add_virt_timer(struct vtimer_list *timer)
+{
+ __add_vtimer(timer, 0);
+}
+EXPORT_SYMBOL(add_virt_timer);
+
+/*
+ * add_virt_timer_int - add an interval virtual CPU timer
+ */
+void add_virt_timer_periodic(struct vtimer_list *timer)
+{
+ __add_vtimer(timer, 1);
+}
+EXPORT_SYMBOL(add_virt_timer_periodic);
+
+static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic)
+{
+ unsigned long flags;
+ int rc;
+
+ BUG_ON(!timer->function);
+
+ if (timer->expires == expires && vtimer_pending(timer))
+ return 1;
+ spin_lock_irqsave(&virt_timer_lock, flags);
+ rc = vtimer_pending(timer);
+ if (rc)
+ list_del_init(&timer->entry);
+ timer->interval = periodic ? expires : 0;
+ timer->expires = expires;
+ internal_add_vtimer(timer);
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
+ return rc;
+}
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer(struct vtimer_list *timer, u64 expires)
+{
+ return __mod_vtimer(timer, expires, 0);
+}
+EXPORT_SYMBOL(mod_virt_timer);
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires)
+{
+ return __mod_vtimer(timer, expires, 1);
+}
+EXPORT_SYMBOL(mod_virt_timer_periodic);
+
+/*
+ * Delete a virtual timer.
+ *
+ * returns whether the deleted timer was pending (1) or not (0)
+ */
+int del_virt_timer(struct vtimer_list *timer)
+{
+ unsigned long flags;
+
+ if (!vtimer_pending(timer))
+ return 0;
+ spin_lock_irqsave(&virt_timer_lock, flags);
+ list_del_init(&timer->entry);
+ spin_unlock_irqrestore(&virt_timer_lock, flags);
+ return 1;
+}
+EXPORT_SYMBOL(del_virt_timer);
+
+/*
+ * Start the virtual CPU timer on the current CPU.
+ */
+void vtime_init(void)
+{
+ /* set initial cpu timer */
+ set_vtimer(VTIMER_MAX_SLICE);
+}