From d0b2f91bede3bd5e3d24dd6803e56eee959c1797 Mon Sep 17 00:00:00 2001
From: André Fabian Silva Delgado <emulatorman@parabola.nu>
Date: Thu, 20 Oct 2016 00:10:27 -0300
Subject: Linux-libre 4.8.2-gnu

---
 arch/s390/kernel/Makefile        |   16 +-
 arch/s390/kernel/als.c           |  124 +++++
 arch/s390/kernel/cache.c         |    7 +-
 arch/s390/kernel/diag.c          |   39 ++
 arch/s390/kernel/dis.c           |    1 -
 arch/s390/kernel/dumpstack.c     |   12 +-
 arch/s390/kernel/early.c         |   22 +
 arch/s390/kernel/entry.S         |   11 +-
 arch/s390/kernel/entry.h         |    2 +
 arch/s390/kernel/fpu.c           |  249 +++++++++
 arch/s390/kernel/head.S          |   47 +-
 arch/s390/kernel/ipl.c           |   25 +-
 arch/s390/kernel/irq.c           |    7 +-
 arch/s390/kernel/kprobes.c       |   12 +
 arch/s390/kernel/machine_kexec.c |   55 +-
 arch/s390/kernel/nmi.c           |   13 +-
 arch/s390/kernel/perf_cpum_cf.c  |   46 +-
 arch/s390/kernel/perf_cpum_sf.c  |   59 +--
 arch/s390/kernel/perf_event.c    |   30 --
 arch/s390/kernel/processor.c     |  114 +++--
 arch/s390/kernel/ptrace.c        |   30 +-
 arch/s390/kernel/sclp.c          |    5 +-
 arch/s390/kernel/setup.c         |   38 +-
 arch/s390/kernel/smp.c           |   18 +-
 arch/s390/kernel/sysinfo.c       |   63 ++-
 arch/s390/kernel/time.c          | 1053 ++------------------------------------
 arch/s390/kernel/topology.c      |   89 ++--
 arch/s390/kernel/vdso32/Makefile |    2 +
 arch/s390/kernel/vdso64/Makefile |    2 +
 arch/s390/kernel/vmlinux.lds.S   |   21 +-
 30 files changed, 852 insertions(+), 1360 deletions(-)
 create mode 100644 arch/s390/kernel/als.c
 create mode 100644 arch/s390/kernel/fpu.c

(limited to 'arch/s390/kernel')

diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 2f5586ab8..3234817c7 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -2,6 +2,10 @@
 # Makefile for the linux kernel.
 #
 
+KCOV_INSTRUMENT_early.o := n
+KCOV_INSTRUMENT_sclp.o := n
+KCOV_INSTRUMENT_als.o := n
+
 ifdef CONFIG_FUNCTION_TRACER
 # Don't trace early setup code and tracing code
 CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
@@ -29,23 +33,27 @@ CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
 CFLAGS_sysinfo.o += -w
 
 #
-# Use -march=z900 for sclp.c to be able to print an error message if
-# the kernel is started on a machine which is too old
+# Use -march=z900 for sclp.c and als.c to be able to print an error
+# message if the kernel is started on a machine which is too old
 #
 CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE)
 ifneq ($(CC_FLAGS_MARCH),-march=z900)
 CFLAGS_REMOVE_sclp.o	+= $(CC_FLAGS_MARCH)
 CFLAGS_sclp.o		+= -march=z900
+CFLAGS_REMOVE_als.o	+= $(CC_FLAGS_MARCH)
+CFLAGS_als.o		+= -march=z900
 AFLAGS_REMOVE_head.o	+= $(CC_FLAGS_MARCH)
 AFLAGS_head.o		+= -march=z900
 endif
 GCOV_PROFILE_sclp.o := n
+GCOV_PROFILE_als.o := n
 
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
+obj-y	+= debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o als.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y	+= runtime_instr.o cache.o dumpstack.o
+obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o
 obj-y	+= entry.o reipl.o relocate_kernel.o
 
 extra-y				+= head.o head64.o vmlinux.lds
diff --git a/arch/s390/kernel/als.c b/arch/s390/kernel/als.c
new file mode 100644
index 000000000..a16e9d1bf
--- /dev/null
+++ b/arch/s390/kernel/als.c
@@ -0,0 +1,124 @@
+/*
+ *    Copyright IBM Corp. 2016
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/processor.h>
+#include <asm/facility.h>
+#include <asm/lowcore.h>
+#include <asm/sclp.h>
+#include "entry.h"
+
+/*
+ * The code within this file will be called very early. It may _not_
+ * access anything within the bss section, since that is not cleared
+ * yet and may contain data (e.g. initrd) that must be saved by other
+ * code.
+ * For temporary objects the stack (16k) should be used.
+ */
+
+static unsigned long als[] __initdata = { FACILITIES_ALS };
+
+static void __init u16_to_hex(char *str, u16 val)
+{
+	int i, num;
+
+	for (i = 1; i <= 4; i++) {
+		num = (val >> (16 - 4 * i)) & 0xf;
+		if (num >= 10)
+			num += 7;
+		*str++ = '0' + num;
+	}
+	*str = '\0';
+}
+
+static void __init print_machine_type(void)
+{
+	static char mach_str[80] __initdata = "Detected machine-type number: ";
+	char type_str[5];
+	struct cpuid id;
+
+	get_cpu_id(&id);
+	u16_to_hex(type_str, id.machine);
+	strcat(mach_str, type_str);
+	_sclp_print_early(mach_str);
+}
+
+static void __init u16_to_decimal(char *str, u16 val)
+{
+	int div = 1;
+
+	while (div * 10 <= val)
+		div *= 10;
+	while (div) {
+		*str++ = '0' + val / div;
+		val %= div;
+		div /= 10;
+	}
+	*str = '\0';
+}
+
+static void __init print_missing_facilities(void)
+{
+	static char als_str[80] __initdata = "Missing facilities: ";
+	unsigned long val;
+	char val_str[6];
+	int i, j, first;
+
+	first = 1;
+	for (i = 0; i < ARRAY_SIZE(als); i++) {
+		val = ~S390_lowcore.stfle_fac_list[i] & als[i];
+		for (j = 0; j < BITS_PER_LONG; j++) {
+			if (!(val & (1UL << (BITS_PER_LONG - 1 - j))))
+				continue;
+			if (!first)
+				strcat(als_str, ",");
+			/*
+			 * Make sure we stay within one line. Consider that
+			 * each facility bit adds up to five characters and
+			 * z/VM adds a four character prefix.
+			 */
+			if (strlen(als_str) > 70) {
+				_sclp_print_early(als_str);
+				*als_str = '\0';
+			}
+			u16_to_decimal(val_str, i * BITS_PER_LONG + j);
+			strcat(als_str, val_str);
+			first = 0;
+		}
+	}
+	_sclp_print_early(als_str);
+	_sclp_print_early("See Principles of Operations for facility bits");
+}
+
+static void __init facility_mismatch(void)
+{
+	_sclp_print_early("The Linux kernel requires more recent processor hardware");
+	print_machine_type();
+	print_missing_facilities();
+	disabled_wait(0x8badcccc);
+}
+
+void __init verify_facilities(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(S390_lowcore.stfle_fac_list); i++)
+		S390_lowcore.stfle_fac_list[i] = 0;
+	asm volatile(
+		"	stfl	0(0)\n"
+		: "=m" (S390_lowcore.stfl_fac_list));
+	S390_lowcore.stfle_fac_list[0] = (u64)S390_lowcore.stfl_fac_list << 32;
+	if (S390_lowcore.stfl_fac_list & 0x01000000) {
+		register unsigned long reg0 asm("0") = ARRAY_SIZE(als) - 1;
+
+		asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */
+			     : "+d" (reg0)
+			     : "a" (&S390_lowcore.stfle_fac_list)
+			     : "memory", "cc");
+	}
+	for (i = 0; i < ARRAY_SIZE(als); i++) {
+		if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i])
+			facility_mismatch();
+	}
+}
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 77a84bd78..c8a83276a 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -99,12 +99,7 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
 
 static inline unsigned long ecag(int ai, int li, int ti)
 {
-	unsigned long cmd, val;
-
-	cmd = ai << 4 | li << 1 | ti;
-	asm volatile(".insn	rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
-		     : "=d" (val) : "a" (cmd));
-	return val;
+	return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti);
 }
 
 static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 48b37b835..a97354c8c 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -162,6 +162,30 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
 }
 EXPORT_SYMBOL(diag14);
 
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
+{
+	register unsigned long _subcode asm("0") = *subcode;
+	register unsigned long _size asm("1") = size;
+
+	asm volatile(
+		"	diag	%2,%0,0x204\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b,0b)
+		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
+	*subcode = _subcode;
+	return _size;
+}
+
+int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X204);
+	size = __diag204(&subcode, size, addr);
+	if (subcode)
+		return -1;
+	return size;
+}
+EXPORT_SYMBOL(diag204);
+
 /*
  * Diagnose 210: Get information about a virtual device
  */
@@ -196,3 +220,18 @@ int diag210(struct diag210 *addr)
 	return ccode;
 }
 EXPORT_SYMBOL(diag210);
+
+int diag224(void *ptr)
+{
+	int rc = -EOPNOTSUPP;
+
+	diag_stat_inc(DIAG_STAT_X224);
+	asm volatile(
+		"	diag	%1,%2,0x224\n"
+		"0:	lhi	%0,0x0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+	return rc;
+}
+EXPORT_SYMBOL(diag224);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 8cb9bfdd3..43446fa2a 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -26,7 +26,6 @@
 #include <asm/dis.h>
 #include <asm/io.h>
 #include <linux/atomic.h>
-#include <asm/mathemu.h>
 #include <asm/cpcmd.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 69f9908ac..6693383bc 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -78,14 +78,10 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
 	sp = __dump_trace(func, data, sp,
 			  S390_lowcore.async_stack + frame_size - ASYNC_SIZE,
 			  S390_lowcore.async_stack + frame_size);
-	if (task)
-		__dump_trace(func, data, sp,
-			     (unsigned long)task_stack_page(task),
-			     (unsigned long)task_stack_page(task) + THREAD_SIZE);
-	else
-		__dump_trace(func, data, sp,
-			     S390_lowcore.thread_info,
-			     S390_lowcore.thread_info + THREAD_SIZE);
+	task = task ?: current;
+	__dump_trace(func, data, sp,
+		     (unsigned long)task_stack_page(task),
+		     (unsigned long)task_stack_page(task) + THREAD_SIZE);
 }
 EXPORT_SYMBOL_GPL(dump_trace);
 
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index a0684de5a..717b03aa1 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -231,6 +231,26 @@ static noinline __init void detect_machine_type(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
 }
 
+static noinline __init void setup_arch_string(void)
+{
+	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+
+	if (stsi(mach, 1, 1, 1))
+		return;
+	EBCASC(mach->manufacturer, sizeof(mach->manufacturer));
+	EBCASC(mach->type, sizeof(mach->type));
+	EBCASC(mach->model, sizeof(mach->model));
+	EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
+	dump_stack_set_arch_desc("%-16.16s %-4.4s %-16.16s %-16.16s (%s)",
+				 mach->manufacturer,
+				 mach->type,
+				 mach->model,
+				 mach->model_capacity,
+				 MACHINE_IS_LPAR ? "LPAR" :
+				 MACHINE_IS_VM ? "z/VM" :
+				 MACHINE_IS_KVM ? "KVM" : "unknown");
+}
+
 static __init void setup_topology(void)
 {
 	int max_mnest;
@@ -447,11 +467,13 @@ void __init startup_init(void)
 	ipl_save_parameters();
 	rescue_initrd();
 	clear_bss_section();
+	ptff_init();
 	init_kernel_storage_key();
 	lockdep_off();
 	setup_lowcore_early();
 	setup_facility_list();
 	detect_machine_type();
+	setup_arch_string();
 	ipl_update_parameters();
 	setup_boot_command_line();
 	create_kernel_nss();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 2d47f9cfc..c51650a1e 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -163,6 +163,16 @@ _PIF_WORK	= (_PIF_PER_TRAP)
 	.endm
 
 	.section .kprobes.text, "ax"
+.Ldummy:
+	/*
+	 * This nop exists only in order to avoid that __switch_to starts at
+	 * the beginning of the kprobes text section. In that case we would
+	 * have several symbols at the same address. E.g. objdump would take
+	 * an arbitrary symbol name when disassembling this code.
+	 * With the added nop in between the __switch_to symbol is unique
+	 * again.
+	 */
+	nop	0
 
 /*
  * Scheduler resume function, called by switch_to
@@ -175,7 +185,6 @@ ENTRY(__switch_to)
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
 	lgr	%r1,%r2
 	aghi	%r1,__TASK_thread		# thread_struct of prev task
-	lg	%r4,__TASK_thread_info(%r2)	# get thread_info of prev
 	lg	%r5,__TASK_thread_info(%r3)	# get thread_info of next
 	stg	%r15,__THREAD_ksp(%r1)		# store kernel stack of prev
 	lgr	%r1,%r3
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index bedd2f55d..e79f030dd 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -79,4 +79,6 @@ long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
 
 DECLARE_PER_CPU(u64, mt_cycles[8]);
 
+void verify_facilities(void);
+
 #endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
new file mode 100644
index 000000000..81d1d1887
--- /dev/null
+++ b/arch/s390/kernel/fpu.c
@@ -0,0 +1,249 @@
+/*
+ * In-kernel vector facility support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/api.h>
+
+/*
+ * Per-CPU variable to maintain FPU register ranges that are in use
+ * by the kernel.
+ */
+static DEFINE_PER_CPU(u32, kernel_fpu_state);
+
+#define KERNEL_FPU_STATE_MASK	(KERNEL_FPU_MASK|KERNEL_FPC)
+
+
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	if (!__this_cpu_read(kernel_fpu_state)) {
+		/*
+		 * Save user space FPU state and register contents.  Multiple
+		 * calls because of interruptions do not matter and return
+		 * immediately.  This also sets CIF_FPU to lazy restore FP/VX
+		 * register contents when returning to user space.
+		 */
+		save_fpu_regs();
+	}
+
+	/* Update flags to use the vector facility for KERNEL_FPR */
+	if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
+		flags |= KERNEL_VXR_LOW | KERNEL_FPC;
+		flags &= ~KERNEL_FPR;
+	}
+
+	/* Save and update current kernel VX state */
+	state->mask = __this_cpu_read(kernel_fpu_state);
+	__this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
+
+	/*
+	 * If this is the first call to __kernel_fpu_begin(), no additional
+	 * work is required.
+	 */
+	if (!(state->mask & KERNEL_FPU_STATE_MASK))
+		return;
+
+	/*
+	 * If KERNEL_FPR is still set, the vector facility is not available
+	 * and, thus, save floating-point control and registers only.
+	 */
+	if (state->mask & KERNEL_FPR) {
+		asm volatile("stfpc %0" : "=Q" (state->fpc));
+		asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+		asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+		asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+		asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+		asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+		asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+		asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+		asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+		asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+		asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+		asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+		asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+		asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+		asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+		asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+		asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		return;
+	}
+
+	/*
+	 * If this is a nested call to __kernel_fpu_begin(), check the saved
+	 * state mask to save and later restore the vector registers that
+	 * are already in use.	Let's start with checking floating-point
+	 * controls.
+	 */
+	if (state->mask & KERNEL_FPC)
+		asm volatile("stfpc %0" : "=m" (state->fpc));
+
+	/* Test and save vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector register must be saved and, if so,
+		 * test if all register can be saved.
+		 */
+		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
+		"	jz	20f\n"		/* no work -> done */
+		"	la	1,%[vxrs]\n"	/* load save area */
+		"	jo	18f\n"		/* -> save V0..V31 */
+
+		/*
+		 * Test if V8..V23 can be saved at once... this speeds up
+		 * for KERNEL_fpu_MID only. Otherwise continue to split the
+		 * range of vector registers into two halves and test them
+		 * separately.
+		 */
+		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
+		"	jo	17f\n"		/* -> save V8..V23 */
+
+		/* Test and save the first half of 16 vector registers */
+		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
+		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> save V0..V15 */
+		"	brc	4,3f\n"		/* 01 -> save V0..V7  */
+		"	brc	2,4f\n"		/* 10 -> save V8..V15 */
+
+		/* Test and save the second half of 16 vector registers */
+		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
+		"	jo	19f\n"		/* 11 -> save V16..V31 */
+		"	brc	4,11f\n"	/* 01 -> save V16..V23	*/
+		"	brc	2,12f\n"	/* 10 -> save V24..V31 */
+		"	j	20f\n"		/* 00 -> done */
+
+		/*
+		 * Below are the vstm combinations to save multiple vector
+		 * registers at once.
+		 */
+		"2:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"3:	.word	0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"4:	.word	0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"\n"
+		"11:	.word	0xe707,0x1100,0x0c3e\n"	/* vstm 16,23,256(1) */
+		"	j	20f\n"			/* -> done */
+		"12:	.word	0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
+		"	j	20f\n"			/* -> done */
+		"\n"
+		"17:	.word	0xe787,0x1080,0x043e\n"	/* vstm 8,23,128(1) */
+		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
+		"	j	1b\n"			/* -> VXR_LOW */
+		"\n"
+		"18:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
+		"19:	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
+		"20:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (state->mask)
+		: "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(struct kernel_fpu *state)
+{
+	/* Just update the per-CPU state if there is nothing to restore */
+	if (!(state->mask & KERNEL_FPU_STATE_MASK))
+		goto update_fpu_state;
+
+	/*
+	 * If KERNEL_FPR is specified, the vector facility is not available
+	 * and, thus, restore floating-point control and registers only.
+	 */
+	if (state->mask & KERNEL_FPR) {
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+		asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+		asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+		asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+		asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+		asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+		asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+		asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+		asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+		asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+		asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+		asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+		asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+		asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+		asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+		asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+		asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		goto update_fpu_state;
+	}
+
+	/* Test and restore floating-point controls */
+	if (state->mask & KERNEL_FPC)
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+
+	/* Test and restore (load) vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector registers must be loaded and, if so,
+		 * test if all registers can be loaded at once.
+		 */
+		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
+		"	jz	20f\n"		/* no work -> done */
+		"	la	1,%[vxrs]\n"	/* load load area */
+		"	jo	18f\n"		/* -> load V0..V31 */
+
+		/*
+		 * Test if V8..V23 can be restored at once... this speeds up
+		 * for KERNEL_VXR_MID only. Otherwise continue to split the
+		 * range of vector registers into two halves and test them
+		 * separately.
+		 */
+		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
+		"	jo	17f\n"		/* -> load V8..V23 */
+
+		/* Test and load the first half of 16 vector registers */
+		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
+		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> load V0..V15 */
+		"	brc	4,3f\n"		/* 01 -> load V0..V7  */
+		"	brc	2,4f\n"		/* 10 -> load V8..V15 */
+
+		/* Test and load the second half of 16 vector registers */
+		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
+		"	jo	19f\n"		/* 11 -> load V16..V31 */
+		"	brc	4,11f\n"	/* 01 -> load V16..V23	*/
+		"	brc	2,12f\n"	/* 10 -> load V24..V31 */
+		"	j	20f\n"		/* 00 -> done */
+
+		/*
+		 * Below are the vstm combinations to load multiple vector
+		 * registers at once.
+		 */
+		"2:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"3:	.word	0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"4:	.word	0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */
+		"	j	10b\n"			/* -> VXR_HIGH */
+		"\n"
+		"11:	.word	0xe707,0x1100,0x0c36\n"	/* vlm 16,23,256(1) */
+		"	j	20f\n"			/* -> done */
+		"12:	.word	0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */
+		"	j	20f\n"			/* -> done */
+		"\n"
+		"17:	.word	0xe787,0x1080,0x0436\n"	/* vlm 8,23,128(1) */
+		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
+		"	j	1b\n"			/* -> VXR_LOW */
+		"\n"
+		"18:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
+		"19:	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
+		"20:"
+		:
+		: [vxrs] "Q" (*(struct vx_array *) &state->vxrs),
+		  [m] "d" (state->mask)
+		: "1", "cc");
+
+update_fpu_state:
+	/* Update current kernel VX state */
+	__this_cpu_write(kernel_fpu_state, state->mask);
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index fcaefb041..4431905f8 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -306,49 +306,16 @@ ENTRY(startup_kdump)
 	stck	__LC_LAST_UPDATE_CLOCK
 	spt	6f-.LPG0(%r13)
 	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
-	stfl	0(%r0)			# store facilities @ __LC_STFL_FAC_LIST
-	mvc	__LC_STFLE_FAC_LIST(4),__LC_STFL_FAC_LIST
-	tm	__LC_STFLE_FAC_LIST,0x01	# stfle available ?
-	jz	0f
-	lghi	%r0,FACILITIES_ALS_DWORDS-1
-	.insn	s,0xb2b00000,__LC_STFLE_FAC_LIST # store facility list extended
-	# verify if all required facilities are supported by the machine
-0:	la	%r1,__LC_STFLE_FAC_LIST
-	la	%r2,3f+8-.LPG0(%r13)
-	lhi	%r3,FACILITIES_ALS_DWORDS
-1:	lg	%r0,0(%r1)
-	ng	%r0,0(%r2)
-	clg	%r0,0(%r2)
-	jne	2f
-	la	%r1,8(%r1)
-	la	%r2,8(%r2)
-	ahi	%r3,-1
-	jnz	1b
-	j	4f
-2:	l	%r15,.Lstack-.LPG0(%r13)
+	l	%r15,.Lstack-.LPG0(%r13)
 	ahi	%r15,-STACK_FRAME_OVERHEAD
-	la	%r2,.Lals_string-.LPG0(%r13)
-	l	%r3,.Lsclp_print-.LPG0(%r13)
-	basr	%r14,%r3
-	lpsw	3f-.LPG0(%r13)		# machine type not good enough, crash
-.Lals_string:
-	.asciz	"The Linux kernel requires more recent processor hardware"
-.Lsclp_print:
-	.long	_sclp_print_early
-.Lstack:
-	.long	0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
-	.align 16
-3:	.long	0x000a0000,0x8badcccc
-
-# List of facilities that are required. If not all facilities are present
-# the kernel will crash.
-
-	.quad FACILITIES_ALS
-
-4:
-	/* Continue with startup code in head64.S */
+	brasl	%r14,verify_facilities
+# For uncompressed images, continue in
+# arch/s390/kernel/head64.S. For compressed images, continue in
+# arch/s390/boot/compressed/head.S.
 	jg	startup_continue
 
+.Lstack:
+	.long	0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
 	.align	8
 6:	.long	0x7fffffff,0xffffffff
 
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index d14069d4b..295bfb712 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -121,9 +121,9 @@ static char *dump_type_str(enum dump_type type)
  * Must be in data section since the bss section
  * is not cleared when these are accessed.
  */
-static u8 ipl_ssid __attribute__((__section__(".data"))) = 0;
-static u16 ipl_devno __attribute__((__section__(".data"))) = 0;
-u32 ipl_flags __attribute__((__section__(".data"))) = 0;
+static u8 ipl_ssid __section(.data) = 0;
+static u16 ipl_devno __section(.data) = 0;
+u32 ipl_flags __section(.data) = 0;
 
 enum ipl_method {
 	REIPL_METHOD_CCW_CIO,
@@ -174,7 +174,7 @@ static inline int __diag308(unsigned long subcode, void *addr)
 
 	asm volatile(
 		"	diag	%0,%2,0x308\n"
-		"0:\n"
+		"0:	nopr	%%r7\n"
 		EX_TABLE(0b,0b)
 		: "+d" (_addr), "+d" (_rc)
 		: "d" (subcode) : "cc", "memory");
@@ -563,7 +563,7 @@ static struct kset *ipl_kset;
 
 static void __ipl_run(void *unused)
 {
-	diag308(DIAG308_IPL, NULL);
+	diag308(DIAG308_LOAD_CLEAR, NULL);
 	if (MACHINE_IS_VM)
 		__cpcmd("IPL", NULL, 0, NULL);
 	else if (ipl_info.type == IPL_TYPE_CCW)
@@ -1085,21 +1085,24 @@ static void __reipl_run(void *unused)
 		break;
 	case REIPL_METHOD_CCW_DIAG:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		diag308(DIAG308_IPL, NULL);
+		if (MACHINE_IS_LPAR)
+			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
+		else
+			diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RW_DIAG:
 		diag308(DIAG308_SET, reipl_block_fcp);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RO_DIAG:
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_RO_VM:
 		__cpcmd("IPL", NULL, 0, NULL);
 		break;
 	case REIPL_METHOD_NSS_DIAG:
 		diag308(DIAG308_SET, reipl_block_nss);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_NSS:
 		get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
@@ -1108,7 +1111,7 @@ static void __reipl_run(void *unused)
 	case REIPL_METHOD_DEFAULT:
 		if (MACHINE_IS_VM)
 			__cpcmd("IPL", NULL, 0, NULL);
-		diag308(DIAG308_IPL, NULL);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case REIPL_METHOD_FCP_DUMP:
 		break;
@@ -1423,7 +1426,7 @@ static void diag308_dump(void *dump_block)
 {
 	diag308(DIAG308_SET, dump_block);
 	while (1) {
-		if (diag308(DIAG308_DUMP, NULL) != 0x302)
+		if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
 			break;
 		udelay_simple(USEC_PER_SEC);
 	}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index c373a1d41..285d65610 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -127,9 +127,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_printf(p, "CPU%d       ", cpu);
 		seq_putc(p, '\n');
 	}
-	if (index < NR_IRQS) {
-		if (index >= NR_IRQS_BASE)
-			goto out;
+	if (index < NR_IRQS_BASE) {
 		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
 		irq = irqclass_main_desc[index].irq;
 		for_each_online_cpu(cpu)
@@ -137,6 +135,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 		goto out;
 	}
+	if (index > NR_IRQS_BASE)
+		goto out;
+
 	for (index = 0; index < NR_ARCH_IRQS; index++) {
 		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
 		irq = irqclass_sub_desc[index].irq;
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 250f59725..dd6306c51 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -690,6 +690,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	stack = (unsigned long) regs->gprs[15];
 
 	memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
+
+	/*
+	 * jprobes use jprobe_return() which skips the normal return
+	 * path of the function, and this messes up the accounting of the
+	 * function graph tracer to get messed up.
+	 *
+	 * Pause function graph tracing while performing the jprobe function.
+	 */
+	pause_graph_tracing();
 	return 1;
 }
 NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -705,6 +714,9 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	unsigned long stack;
 
+	/* It's OK to start function graph tracing again */
+	unpause_graph_tracing();
+
 	stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15];
 
 	/* Put the regs back */
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 0e64f08d3..3074c1d83 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -24,6 +24,7 @@
 #include <asm/diag.h>
 #include <asm/elf.h>
 #include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
 #include <asm/os_info.h>
 #include <asm/switch_to.h>
 
@@ -60,8 +61,6 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
 static int __init machine_kdump_pm_init(void)
 {
 	pm_notifier(machine_kdump_pm_cb, 0);
-	/* Create initial mapping for crashkernel memory */
-	arch_kexec_unprotect_crashkres();
 	return 0;
 }
 arch_initcall(machine_kdump_pm_init);
@@ -150,42 +149,40 @@ static int kdump_csum_valid(struct kimage *image)
 
 #ifdef CONFIG_CRASH_DUMP
 
-/*
- * Map or unmap crashkernel memory
- */
-static void crash_map_pages(int enable)
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
 {
-	unsigned long size = resource_size(&crashk_res);
-
-	BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN ||
-	       size % KEXEC_CRASH_MEM_ALIGN);
-	if (enable)
-		vmem_add_mapping(crashk_res.start, size);
-	else {
-		vmem_remove_mapping(crashk_res.start, size);
-		if (size)
-			os_info_crashkernel_add(crashk_res.start, size);
-		else
-			os_info_crashkernel_add(0, 0);
-	}
+	unsigned long addr, size;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE)
+		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+	size = begin - crashk_res.start;
+	if (size)
+		os_info_crashkernel_add(crashk_res.start, size);
+	else
+		os_info_crashkernel_add(0, 0);
+}
+
+static void crash_protect_pages(int protect)
+{
+	unsigned long size;
+
+	if (!crashk_res.end)
+		return;
+	size = resource_size(&crashk_res);
+	if (protect)
+		set_memory_ro(crashk_res.start, size >> PAGE_SHIFT);
+	else
+		set_memory_rw(crashk_res.start, size >> PAGE_SHIFT);
 }
 
-/*
- * Unmap crashkernel memory
- */
 void arch_kexec_protect_crashkres(void)
 {
-	if (crashk_res.end)
-		crash_map_pages(0);
+	crash_protect_pages(1);
 }
 
-/*
- * Map crashkernel memory
- */
 void arch_kexec_unprotect_crashkres(void)
 {
-	if (crashk_res.end)
-		crash_map_pages(1);
+	crash_protect_pages(0);
 }
 
 #endif
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 07302ce37..29376f0e7 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <asm/lowcore.h>
 #include <asm/smp.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/cputime.h>
 #include <asm/nmi.h>
 #include <asm/crw.h>
@@ -27,7 +27,6 @@ struct mcck_struct {
 	unsigned int kill_task : 1;
 	unsigned int channel_report : 1;
 	unsigned int warning : 1;
-	unsigned int etr_queue : 1;
 	unsigned int stp_queue : 1;
 	unsigned long mcck_code;
 };
@@ -82,8 +81,6 @@ void s390_handle_mcck(void)
 		if (xchg(&mchchk_wng_posted, 1) == 0)
 			kill_cad_pid(SIGPWR, 1);
 	}
-	if (mcck.etr_queue)
-		etr_queue_work();
 	if (mcck.stp_queue)
 		stp_queue_work();
 	if (mcck.kill_task) {
@@ -241,8 +238,6 @@ static int notrace s390_validate_registers(union mci mci)
 
 #define ED_STP_ISLAND	6	/* External damage STP island check */
 #define ED_STP_SYNC	7	/* External damage STP sync check */
-#define ED_ETR_SYNC	12	/* External damage ETR sync check */
-#define ED_ETR_SWITCH	13	/* External damage ETR switch to local */
 
 /*
  * machine check handler.
@@ -325,15 +320,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	}
 	if (mci.ed && mci.ec) {
 		/* External damage */
-		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
-			mcck->etr_queue |= etr_sync_check();
-		if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
-			mcck->etr_queue |= etr_switch_to_local();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
 			mcck->stp_queue |= stp_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 			mcck->stp_queue |= stp_island_check();
-		if (mcck->etr_queue || mcck->stp_queue)
+		if (mcck->stp_queue)
 			set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	if (mci.se)
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 7ec63b1d9..037c2a253 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -664,30 +664,22 @@ static struct pmu cpumf_pmu = {
 	.cancel_txn   = cpumf_pmu_cancel_txn,
 };
 
-static int cpumf_pmu_notifier(struct notifier_block *self, unsigned long action,
-			      void *hcpu)
+static int cpumf_pmf_setup(unsigned int cpu, int flags)
 {
-	int flags;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		flags = PMC_INIT;
-		local_irq_disable();
-		setup_pmc_cpu(&flags);
-		local_irq_enable();
-		break;
-	case CPU_DOWN_PREPARE:
-		flags = PMC_RELEASE;
-		local_irq_disable();
-		setup_pmc_cpu(&flags);
-		local_irq_enable();
-		break;
-	default:
-		break;
-	}
+	local_irq_disable();
+	setup_pmc_cpu(&flags);
+	local_irq_enable();
+	return 0;
+}
+
+static int s390_pmu_online_cpu(unsigned int cpu)
+{
+	return cpumf_pmf_setup(cpu, PMC_INIT);
+}
 
-	return NOTIFY_OK;
+static int s390_pmu_offline_cpu(unsigned int cpu)
+{
+	return cpumf_pmf_setup(cpu, PMC_RELEASE);
 }
 
 static int __init cpumf_pmu_init(void)
@@ -707,7 +699,7 @@ static int __init cpumf_pmu_init(void)
 	if (rc) {
 		pr_err("Registering for CPU-measurement alerts "
 		       "failed with rc=%i\n", rc);
-		goto out;
+		return rc;
 	}
 
 	cpumf_pmu.attr_groups = cpumf_cf_event_group();
@@ -716,10 +708,10 @@ static int __init cpumf_pmu_init(void)
 		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
 		unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
 					cpumf_measurement_alert);
-		goto out;
+		return rc;
 	}
-	perf_cpu_notifier(cpumf_pmu_notifier);
-out:
-	return rc;
+	return cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+				 "AP_PERF_S390_CF_ONLINE",
+				 s390_pmu_online_cpu, s390_pmu_offline_cpu);
 }
 early_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index a8e832166..fcc634c14 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -601,17 +601,12 @@ static void release_pmc_hardware(void)
 
 	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
-	perf_release_sampling();
 }
 
 static int reserve_pmc_hardware(void)
 {
 	int flags = PMC_INIT;
-	int err;
 
-	err = perf_reserve_sampling();
-	if (err)
-		return err;
 	on_each_cpu(setup_pmc_cpu, &flags, 1);
 	if (flags & PMC_FAILURE) {
 		release_pmc_hardware();
@@ -979,12 +974,15 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
 	struct pt_regs regs;
 	struct perf_sf_sde_regs *sde_regs;
 	struct perf_sample_data data;
-	struct perf_raw_record raw;
+	struct perf_raw_record raw = {
+		.frag = {
+			.size = sfr->size,
+			.data = sfr,
+		},
+	};
 
 	/* Setup perf sample */
 	perf_sample_data_init(&data, 0, event->hw.last_period);
-	raw.size = sfr->size;
-	raw.data = sfr;
 	data.raw = &raw;
 
 	/* Setup pt_regs to look like an CPU-measurement external interrupt
@@ -1506,37 +1504,28 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
 		sf_disable();
 	}
 }
-
-static int cpumf_pmu_notifier(struct notifier_block *self,
-			      unsigned long action, void *hcpu)
+static int cpusf_pmu_setup(unsigned int cpu, int flags)
 {
-	int flags;
-
 	/* Ignore the notification if no events are scheduled on the PMU.
 	 * This might be racy...
 	 */
 	if (!atomic_read(&num_events))
-		return NOTIFY_OK;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		flags = PMC_INIT;
-		local_irq_disable();
-		setup_pmc_cpu(&flags);
-		local_irq_enable();
-		break;
-	case CPU_DOWN_PREPARE:
-		flags = PMC_RELEASE;
-		local_irq_disable();
-		setup_pmc_cpu(&flags);
-		local_irq_enable();
-		break;
-	default:
-		break;
-	}
+		return 0;
 
-	return NOTIFY_OK;
+	local_irq_disable();
+	setup_pmc_cpu(&flags);
+	local_irq_enable();
+	return 0;
+}
+
+static int s390_pmu_sf_online_cpu(unsigned int cpu)
+{
+	return cpusf_pmu_setup(cpu, PMC_INIT);
+}
+
+static int s390_pmu_sf_offline_cpu(unsigned int cpu)
+{
+	return cpusf_pmu_setup(cpu, PMC_RELEASE);
 }
 
 static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
@@ -1636,7 +1625,9 @@ static int __init init_cpum_sampling_pmu(void)
 					cpumf_measurement_alert);
 		goto out;
 	}
-	perf_cpu_notifier(cpumf_pmu_notifier);
+
+	cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "AP_PERF_S390_SF_ONLINE",
+			  s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
 out:
 	return err;
 }
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 87035fa58..17431f63d 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -248,33 +248,3 @@ ssize_t cpumf_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%04llx,name=%s\n",
 		       pmu_attr->id, attr->attr.name);
 }
-
-/* Reserve/release functions for sharing perf hardware */
-static DEFINE_SPINLOCK(perf_hw_owner_lock);
-static void *perf_sampling_owner;
-
-int perf_reserve_sampling(void)
-{
-	int err;
-
-	err = 0;
-	spin_lock(&perf_hw_owner_lock);
-	if (perf_sampling_owner) {
-		pr_warn("The sampling facility is already reserved by %p\n",
-			perf_sampling_owner);
-		err = -EBUSY;
-	} else
-		perf_sampling_owner = __builtin_return_address(0);
-	spin_unlock(&perf_hw_owner_lock);
-	return err;
-}
-EXPORT_SYMBOL(perf_reserve_sampling);
-
-void perf_release_sampling(void)
-{
-	spin_lock(&perf_hw_owner_lock);
-	WARN_ON(!perf_sampling_owner);
-	perf_sampling_owner = NULL;
-	spin_unlock(&perf_hw_owner_lock);
-}
-EXPORT_SYMBOL(perf_release_sampling);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index de7451065..81d080808 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -13,12 +13,45 @@
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <asm/diag.h>
+#include <asm/facility.h>
 #include <asm/elf.h>
 #include <asm/lowcore.h>
 #include <asm/param.h>
 #include <asm/smp.h>
 
-static DEFINE_PER_CPU(struct cpuid, cpu_id);
+struct cpu_info {
+	unsigned int cpu_mhz_dynamic;
+	unsigned int cpu_mhz_static;
+	struct cpuid cpu_id;
+};
+
+static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+
+static bool machine_has_cpu_mhz;
+
+void __init cpu_detect_mhz_feature(void)
+{
+	if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL)
+		machine_has_cpu_mhz = 1;
+}
+
+static void update_cpu_mhz(void *arg)
+{
+	unsigned long mhz;
+	struct cpu_info *c;
+
+	mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+	c = this_cpu_ptr(&cpu_info);
+	c->cpu_mhz_dynamic = mhz >> 32;
+	c->cpu_mhz_static = mhz & 0xffffffff;
+}
+
+void s390_update_cpu_mhz(void)
+{
+	s390_adjust_jiffies();
+	if (machine_has_cpu_mhz)
+		on_each_cpu(update_cpu_mhz, NULL, 0);
+}
 
 void notrace cpu_relax(void)
 {
@@ -35,9 +68,11 @@ EXPORT_SYMBOL(cpu_relax);
  */
 void cpu_init(void)
 {
-	struct cpuid *id = this_cpu_ptr(&cpu_id);
+	struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id);
 
 	get_cpu_id(id);
+	if (machine_has_cpu_mhz)
+		update_cpu_mhz(NULL);
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 	BUG_ON(current->mm);
@@ -53,10 +88,7 @@ int cpu_have_feature(unsigned int num)
 }
 EXPORT_SYMBOL(cpu_have_feature);
 
-/*
- * show_cpuinfo - Get information on one CPU for use by procfs.
- */
-static int show_cpuinfo(struct seq_file *m, void *v)
+static void show_cpu_summary(struct seq_file *m, void *v)
 {
 	static const char *hwcap_str[] = {
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
@@ -65,34 +97,55 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	static const char * const int_hwcap_str[] = {
 		"sie"
 	};
-	unsigned long n = (unsigned long) v - 1;
-	int i;
-
-	if (!n) {
-		s390_adjust_jiffies();
-		seq_printf(m, "vendor_id       : IBM/S390\n"
-			   "# processors    : %i\n"
-			   "bogomips per cpu: %lu.%02lu\n",
-			   num_online_cpus(), loops_per_jiffy/(500000/HZ),
-			   (loops_per_jiffy/(5000/HZ))%100);
-		seq_puts(m, "features\t: ");
-		for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
-			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
-				seq_printf(m, "%s ", hwcap_str[i]);
-		for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
-			if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
-				seq_printf(m, "%s ", int_hwcap_str[i]);
-		seq_puts(m, "\n");
-		show_cacheinfo(m);
-	}
-	if (cpu_online(n)) {
-		struct cpuid *id = &per_cpu(cpu_id, n);
-		seq_printf(m, "processor %li: "
+	int i, cpu;
+
+	seq_printf(m, "vendor_id       : IBM/S390\n"
+		   "# processors    : %i\n"
+		   "bogomips per cpu: %lu.%02lu\n",
+		   num_online_cpus(), loops_per_jiffy/(500000/HZ),
+		   (loops_per_jiffy/(5000/HZ))%100);
+	seq_printf(m, "max thread id   : %d\n", smp_cpu_mtid);
+	seq_puts(m, "features\t: ");
+	for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+		if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", hwcap_str[i]);
+	for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
+		if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", int_hwcap_str[i]);
+	seq_puts(m, "\n");
+	show_cacheinfo(m);
+	for_each_online_cpu(cpu) {
+		struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
+
+		seq_printf(m, "processor %d: "
 			   "version = %02X,  "
 			   "identification = %06X,  "
 			   "machine = %04X\n",
-			   n, id->version, id->ident, id->machine);
+			   cpu, id->version, id->ident, id->machine);
 	}
+}
+
+static void show_cpu_mhz(struct seq_file *m, unsigned long n)
+{
+	struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+
+	seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
+	seq_printf(m, "cpu MHz static  : %d\n", c->cpu_mhz_static);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	unsigned long n = (unsigned long) v - 1;
+
+	if (!n)
+		show_cpu_summary(m, v);
+	if (!machine_has_cpu_mhz)
+		return 0;
+	seq_printf(m, "\ncpu number      : %ld\n", n);
+	show_cpu_mhz(m, n);
 	return 0;
 }
 
@@ -126,4 +179,3 @@ const struct seq_operations cpuinfo_op = {
 	.stop	= c_stop,
 	.show	= show_cpuinfo,
 };
-
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 49b1c13bf..9336e824e 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -821,14 +821,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 {
-	long ret = 0;
-
-	/* Do the secure computing check first. */
-	if (secure_computing()) {
-		/* seccomp failures shouldn't expose any additional code. */
-		ret = -1;
-		goto out;
-	}
+	unsigned long mask = -1UL;
 
 	/*
 	 * The sysc_tracesys code in entry.S stored the system
@@ -843,17 +836,26 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 * the system call and the system call restart handling.
 		 */
 		clear_pt_regs_flag(regs, PIF_SYSCALL);
-		ret = -1;
+		return -1;
+	}
+
+	/* Do the secure computing check after ptrace. */
+	if (secure_computing(NULL)) {
+		/* seccomp failures shouldn't expose any additional code. */
+		return -1;
 	}
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gprs[2]);
 
-	audit_syscall_entry(regs->gprs[2], regs->orig_gpr2,
-			    regs->gprs[3], regs->gprs[4],
-			    regs->gprs[5]);
-out:
-	return ret ?: regs->gprs[2];
+	if (is_compat_task())
+		mask = 0xffffffff;
+
+	audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask,
+			    regs->gprs[3] &mask, regs->gprs[4] &mask,
+			    regs->gprs[5] &mask);
+
+	return regs->gprs[2];
 }
 
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c
index d88db40bd..f08af675f 100644
--- a/arch/s390/kernel/sclp.c
+++ b/arch/s390/kernel/sclp.c
@@ -12,8 +12,9 @@
 #define EVTYP_VT220MSG_MASK	0x00000040
 #define EVTYP_MSG_MASK		0x40000000
 
-static char _sclp_work_area[4096] __aligned(PAGE_SIZE);
-static bool have_vt220, have_linemode;
+static char _sclp_work_area[4096] __aligned(PAGE_SIZE) __section(data);
+static bool have_vt220 __section(data);
+static bool have_linemode __section(data);
 
 static void _sclp_wait_int(void)
 {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f31939147..7f7ba5f23 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -130,17 +130,14 @@ __setup("condev=", condev_setup);
 
 static void __init set_preferred_console(void)
 {
-	if (MACHINE_IS_KVM) {
-		if (sclp.has_vt220)
-			add_preferred_console("ttyS", 1, NULL);
-		else if (sclp.has_linemode)
-			add_preferred_console("ttyS", 0, NULL);
-		else
-			add_preferred_console("hvc", 0, NULL);
-	} else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
 		add_preferred_console("ttyS", 0, NULL);
 	else if (CONSOLE_IS_3270)
 		add_preferred_console("tty3270", 0, NULL);
+	else if (CONSOLE_IS_VT220)
+		add_preferred_console("ttyS", 1, NULL);
+	else if (CONSOLE_IS_HVC)
+		add_preferred_console("hvc", 0, NULL);
 }
 
 static int __init conmode_setup(char *str)
@@ -206,6 +203,13 @@ static void __init conmode_default(void)
 			SET_CONSOLE_SCLP;
 #endif
 		}
+	} else if (MACHINE_IS_KVM) {
+		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
+			SET_CONSOLE_VT220;
+		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
+			SET_CONSOLE_SCLP;
+		else
+			SET_CONSOLE_HVC;
 	} else {
 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 		SET_CONSOLE_SCLP;
@@ -289,7 +293,7 @@ static int __init parse_vmalloc(char *arg)
 }
 early_param("vmalloc", parse_vmalloc);
 
-void *restart_stack __attribute__((__section__(".data")));
+void *restart_stack __section(.data);
 
 static void __init setup_lowcore(void)
 {
@@ -432,6 +436,20 @@ static void __init setup_resources(void)
 			}
 		}
 	}
+#ifdef CONFIG_CRASH_DUMP
+	/*
+	 * Re-add removed crash kernel memory as reserved memory. This makes
+	 * sure it will be mapped with the identity mapping and struct pages
+	 * will be created, so it can be resized later on.
+	 * However add it later since the crash kernel resource should not be
+	 * part of the System RAM resource.
+	 */
+	if (crashk_res.end) {
+		memblock_add(crashk_res.start, resource_size(&crashk_res));
+		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
+		insert_resource(&iomem_resource, &crashk_res);
+	}
+#endif
 }
 
 static void __init setup_memory_end(void)
@@ -602,7 +620,6 @@ static void __init reserve_crashkernel(void)
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
-	insert_resource(&iomem_resource, &crashk_res);
 	memblock_remove(crash_base, crash_size);
 	pr_info("Reserving %lluMB of memory at %lluMB "
 		"for crashkernel (System RAM: %luMB)\n",
@@ -901,6 +918,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_vmcoreinfo();
 	setup_lowcore();
 	smp_fill_possible_mask();
+	cpu_detect_mhz_feature();
         cpu_init();
 	numa_setup();
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 7b89a7572..35531fe1c 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -242,10 +242,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 {
 	struct lowcore *lc = pcpu->lowcore;
 
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
 	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
-	atomic_inc(&init_mm.context.attach_count);
 	lc->cpu_nr = cpu;
 	lc->spinlock_lockval = arch_spin_lockval(cpu);
 	lc->percpu_offset = __per_cpu_offset[cpu];
@@ -320,17 +318,11 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
  */
 static int pcpu_set_smt(unsigned int mtid)
 {
-	register unsigned long reg1 asm ("1") = (unsigned long) mtid;
 	int cc;
 
 	if (smp_cpu_mtid == mtid)
 		return 0;
-	asm volatile(
-		"	sigp	%1,0,%2	# sigp set multi-threading\n"
-		"	ipm	%0\n"
-		"	srl	%0,28\n"
-		: "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING)
-		: "cc");
+	cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
 	if (cc == 0) {
 		smp_cpu_mtid = mtid;
 		smp_cpu_mt_shift = 0;
@@ -876,10 +868,8 @@ void __cpu_die(unsigned int cpu)
 	while (!pcpu_stopped(pcpu))
 		cpu_relax();
 	pcpu_free_lowcore(pcpu);
-	atomic_dec(&init_mm.context.attach_count);
 	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
-	if (MACHINE_HAS_TLB_LC)
-		cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
 }
 
 void __noreturn cpu_die(void)
@@ -897,7 +887,7 @@ void __init smp_fill_possible_mask(void)
 
 	sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
 	sclp_max = min(smp_max_threads, sclp_max);
-	sclp_max = sclp.max_cores * sclp_max ?: nr_cpu_ids;
+	sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
 	possible = setup_possible_cpus ?: nr_cpu_ids;
 	possible = min(possible, sclp_max);
 	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index f7dba3887..050b8d067 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -16,21 +16,11 @@
 #include <asm/sysinfo.h>
 #include <asm/cpcmd.h>
 #include <asm/topology.h>
-
-/* Sigh, math-emu. Don't ask. */
-#include <asm/sfp-util.h>
-#include <math-emu/soft-fp.h>
-#include <math-emu/single.h>
+#include <asm/fpu/api.h>
 
 int topology_max_mnest;
 
-/*
- * stsi - store system information
- *
- * Returns the current configuration level if function code 0 was specified.
- * Otherwise returns 0 on success or a negative value on error.
- */
-int stsi(void *sysinfo, int fc, int sel1, int sel2)
+static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
 {
 	register int r0 asm("0") = (fc << 28) | sel1;
 	register int r1 asm("1") = sel2;
@@ -45,9 +35,24 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2)
 		: "+d" (r0), "+d" (rc)
 		: "d" (r1), "a" (sysinfo), "K" (-EOPNOTSUPP)
 		: "cc", "memory");
+	*lvl = ((unsigned int) r0) >> 28;
+	return rc;
+}
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	int lvl, rc;
+
+	rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
 	if (rc)
 		return rc;
-	return fc ? 0 : ((unsigned int) r0) >> 28;
+	return fc ? 0 : lvl;
 }
 EXPORT_SYMBOL(stsi);
 
@@ -414,10 +419,8 @@ subsys_initcall(create_proc_service_level);
 void s390_adjust_jiffies(void)
 {
 	struct sysinfo_1_2_2 *info;
-	const unsigned int fmil = 0x4b189680;	/* 1e7 as 32-bit float. */
-	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
-	FP_DECL_EX;
-	unsigned int capability;
+	unsigned long capability;
+	struct kernel_fpu fpu;
 
 	info = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!info)
@@ -433,15 +436,25 @@ void s390_adjust_jiffies(void)
 		 * higher cpu capacity. Bogomips are the other way round.
 		 * To get to a halfway suitable number we divide 1e7
 		 * by the cpu capability number. Yes, that means a floating
-		 * point division .. math-emu here we come :-)
+		 * point division ..
 		 */
-		FP_UNPACK_SP(SA, &fmil);
-		if ((info->capability >> 23) == 0)
-			FP_FROM_INT_S(SB, (long) info->capability, 64, long);
-		else
-			FP_UNPACK_SP(SB, &info->capability);
-		FP_DIV_S(SR, SA, SB);
-		FP_TO_INT_S(capability, SR, 32, 0);
+		kernel_fpu_begin(&fpu, KERNEL_FPR);
+		asm volatile(
+			"	sfpc	%3\n"
+			"	l	%0,%1\n"
+			"	tmlh	%0,0xff80\n"
+			"	jnz	0f\n"
+			"	cefbr	%%f2,%0\n"
+			"	j	1f\n"
+			"0:	le	%%f2,%1\n"
+			"1:	cefbr	%%f0,%2\n"
+			"	debr	%%f0,%%f2\n"
+			"	cgebr	%0,5,%%f0\n"
+			: "=&d" (capability)
+			: "Q" (info->capability), "d" (10000000), "d" (0)
+			: "cc"
+			);
+		kernel_fpu_end(&fpu);
 	} else
 		/*
 		 * Really old machine without stsi block for basic
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 9409d32f2..4e9949800 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -39,13 +39,14 @@
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
 #include <asm/uaccess.h>
+#include <asm/facility.h>
 #include <asm/delay.h>
 #include <asm/div64.h>
 #include <asm/vdso.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/vtimer.h>
-#include <asm/etr.h>
+#include <asm/stp.h>
 #include <asm/cio.h>
 #include "entry.h"
 
@@ -61,6 +62,32 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators);
 ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
 EXPORT_SYMBOL(s390_epoch_delta_notifier);
 
+unsigned char ptff_function_mask[16];
+unsigned long lpar_offset;
+unsigned long initial_leap_seconds;
+
+/*
+ * Get time offsets with PTFF
+ */
+void __init ptff_init(void)
+{
+	struct ptff_qto qto;
+	struct ptff_qui qui;
+
+	if (!test_facility(28))
+		return;
+	ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
+
+	/* get LPAR offset */
+	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+		lpar_offset = qto.tod_epoch_difference;
+
+	/* get initial leap seconds */
+	if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
+		initial_leap_seconds = (unsigned long)
+			((long) qui.old_leap * 4096000000L);
+}
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
@@ -162,30 +189,32 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
 		set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-static void etr_timing_alert(struct etr_irq_parm *);
 static void stp_timing_alert(struct stp_irq_parm *);
 
 static void timing_alert_interrupt(struct ext_code ext_code,
 				   unsigned int param32, unsigned long param64)
 {
 	inc_irq_stat(IRQEXT_TLA);
-	if (param32 & 0x00c40000)
-		etr_timing_alert((struct etr_irq_parm *) &param32);
 	if (param32 & 0x00038000)
 		stp_timing_alert((struct stp_irq_parm *) &param32);
 }
 
-static void etr_reset(void);
 static void stp_reset(void);
 
 void read_persistent_clock64(struct timespec64 *ts)
 {
-	tod_to_timeval(get_tod_clock() - TOD_UNIX_EPOCH, ts);
+	__u64 clock;
+
+	clock = get_tod_clock() - initial_leap_seconds;
+	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 
 void read_boot_clock64(struct timespec64 *ts)
 {
-	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+	__u64 clock;
+
+	clock = sched_clock_base_cc - initial_leap_seconds;
+	tod_to_timeval(clock - TOD_UNIX_EPOCH, ts);
 }
 
 static cycle_t read_tod_clock(struct clocksource *cs)
@@ -269,7 +298,6 @@ void update_vsyscall_tz(void)
 void __init time_init(void)
 {
 	/* Reset time synchronization interfaces. */
-	etr_reset();
 	stp_reset();
 
 	/* request the clock comparator external interrupt */
@@ -337,20 +365,20 @@ static unsigned long clock_sync_flags;
 #define CLOCK_SYNC_STP		3
 
 /*
- * The synchronous get_clock function. It will write the current clock
- * value to the clock pointer and return 0 if the clock is in sync with
- * the external time source. If the clock mode is local it will return
- * -EOPNOTSUPP and -EAGAIN if the clock is not in sync with the external
- * reference.
+ * The get_clock function for the physical clock. It will get the current
+ * TOD clock, subtract the LPAR offset and write the result to *clock.
+ * The function returns 0 if the clock is in sync with the external time
+ * source. If the clock mode is local it will return -EOPNOTSUPP and
+ * -EAGAIN if the clock is not in sync with the external reference.
  */
-int get_sync_clock(unsigned long long *clock)
+int get_phys_clock(unsigned long long *clock)
 {
 	atomic_t *sw_ptr;
 	unsigned int sw0, sw1;
 
 	sw_ptr = &get_cpu_var(clock_sync_word);
 	sw0 = atomic_read(sw_ptr);
-	*clock = get_tod_clock();
+	*clock = get_tod_clock() - lpar_offset;
 	sw1 = atomic_read(sw_ptr);
 	put_cpu_var(clock_sync_word);
 	if (sw0 == sw1 && (sw0 & 0x80000000U))
@@ -364,7 +392,7 @@ int get_sync_clock(unsigned long long *clock)
 		return -EACCES;
 	return -EAGAIN;
 }
-EXPORT_SYMBOL(get_sync_clock);
+EXPORT_SYMBOL(get_phys_clock);
 
 /*
  * Make get_sync_clock return -EAGAIN.
@@ -416,301 +444,6 @@ static void __init time_init_wq(void)
 	time_sync_wq = create_singlethread_workqueue("timesync");
 }
 
-/*
- * External Time Reference (ETR) code.
- */
-static int etr_port0_online;
-static int etr_port1_online;
-static int etr_steai_available;
-
-static int __init early_parse_etr(char *p)
-{
-	if (strncmp(p, "off", 3) == 0)
-		etr_port0_online = etr_port1_online = 0;
-	else if (strncmp(p, "port0", 5) == 0)
-		etr_port0_online = 1;
-	else if (strncmp(p, "port1", 5) == 0)
-		etr_port1_online = 1;
-	else if (strncmp(p, "on", 2) == 0)
-		etr_port0_online = etr_port1_online = 1;
-	return 0;
-}
-early_param("etr", early_parse_etr);
-
-enum etr_event {
-	ETR_EVENT_PORT0_CHANGE,
-	ETR_EVENT_PORT1_CHANGE,
-	ETR_EVENT_PORT_ALERT,
-	ETR_EVENT_SYNC_CHECK,
-	ETR_EVENT_SWITCH_LOCAL,
-	ETR_EVENT_UPDATE,
-};
-
-/*
- * Valid bit combinations of the eacr register are (x = don't care):
- * e0 e1 dp p0 p1 ea es sl
- *  0  0  x  0	0  0  0  0  initial, disabled state
- *  0  0  x  0	1  1  0  0  port 1 online
- *  0  0  x  1	0  1  0  0  port 0 online
- *  0  0  x  1	1  1  0  0  both ports online
- *  0  1  x  0	1  1  0  0  port 1 online and usable, ETR or PPS mode
- *  0  1  x  0	1  1  0  1  port 1 online, usable and ETR mode
- *  0  1  x  0	1  1  1  0  port 1 online, usable, PPS mode, in-sync
- *  0  1  x  0	1  1  1  1  port 1 online, usable, ETR mode, in-sync
- *  0  1  x  1	1  1  0  0  both ports online, port 1 usable
- *  0  1  x  1	1  1  1  0  both ports online, port 1 usable, PPS mode, in-sync
- *  0  1  x  1	1  1  1  1  both ports online, port 1 usable, ETR mode, in-sync
- *  1  0  x  1	0  1  0  0  port 0 online and usable, ETR or PPS mode
- *  1  0  x  1	0  1  0  1  port 0 online, usable and ETR mode
- *  1  0  x  1	0  1  1  0  port 0 online, usable, PPS mode, in-sync
- *  1  0  x  1	0  1  1  1  port 0 online, usable, ETR mode, in-sync
- *  1  0  x  1	1  1  0  0  both ports online, port 0 usable
- *  1  0  x  1	1  1  1  0  both ports online, port 0 usable, PPS mode, in-sync
- *  1  0  x  1	1  1  1  1  both ports online, port 0 usable, ETR mode, in-sync
- *  1  1  x  1	1  1  1  0  both ports online & usable, ETR, in-sync
- *  1  1  x  1	1  1  1  1  both ports online & usable, ETR, in-sync
- */
-static struct etr_eacr etr_eacr;
-static u64 etr_tolec;			/* time of last eacr update */
-static struct etr_aib etr_port0;
-static int etr_port0_uptodate;
-static struct etr_aib etr_port1;
-static int etr_port1_uptodate;
-static unsigned long etr_events;
-static struct timer_list etr_timer;
-
-static void etr_timeout(unsigned long dummy);
-static void etr_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(etr_work_mutex);
-static DECLARE_WORK(etr_work, etr_work_fn);
-
-/*
- * Reset ETR attachment.
- */
-static void etr_reset(void)
-{
-	etr_eacr =  (struct etr_eacr) {
-		.e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
-		.p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
-		.es = 0, .sl = 0 };
-	if (etr_setr(&etr_eacr) == 0) {
-		etr_tolec = get_tod_clock();
-		set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-	} else if (etr_port0_online || etr_port1_online) {
-		pr_warn("The real or virtual hardware system does not provide an ETR interface\n");
-		etr_port0_online = etr_port1_online = 0;
-	}
-}
-
-static int __init etr_init(void)
-{
-	struct etr_aib aib;
-
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
-		return 0;
-	time_init_wq();
-	/* Check if this machine has the steai instruction. */
-	if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
-		etr_steai_available = 1;
-	setup_timer(&etr_timer, etr_timeout, 0UL);
-	if (etr_port0_online) {
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-	if (etr_port1_online) {
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-	return 0;
-}
-
-arch_initcall(etr_init);
-
-/*
- * Two sorts of ETR machine checks. The architecture reads:
- * "When a machine-check niterruption occurs and if a switch-to-local or
- *  ETR-sync-check interrupt request is pending but disabled, this pending
- *  disabled interruption request is indicated and is cleared".
- * Which means that we can get etr_switch_to_local events from the machine
- * check handler although the interruption condition is disabled. Lovely..
- */
-
-/*
- * Switch to local machine check. This is called when the last usable
- * ETR port goes inactive. After switch to local the clock is not in sync.
- */
-int etr_switch_to_local(void)
-{
-	if (!etr_eacr.sl)
-		return 0;
-	disable_sync_clock(NULL);
-	if (!test_and_set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events)) {
-		etr_eacr.es = etr_eacr.sl = 0;
-		etr_setr(&etr_eacr);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- * ETR sync check machine check. This is called when the ETR OTE and the
- * local clock OTE are farther apart than the ETR sync check tolerance.
- * After a ETR sync check the clock is not in sync. The machine check
- * is broadcasted to all cpus at the same time.
- */
-int etr_sync_check(void)
-{
-	if (!etr_eacr.es)
-		return 0;
-	disable_sync_clock(NULL);
-	if (!test_and_set_bit(ETR_EVENT_SYNC_CHECK, &etr_events)) {
-		etr_eacr.es = 0;
-		etr_setr(&etr_eacr);
-		return 1;
-	}
-	return 0;
-}
-
-void etr_queue_work(void)
-{
-	queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * ETR timing alert. There are two causes:
- * 1) port state change, check the usability of the port
- * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
- *    sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
- *    or ETR-data word 4 (edf4) has changed.
- */
-static void etr_timing_alert(struct etr_irq_parm *intparm)
-{
-	if (intparm->pc0)
-		/* ETR port 0 state change. */
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-	if (intparm->pc1)
-		/* ETR port 1 state change. */
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-	if (intparm->eai)
-		/*
-		 * ETR port alert on either port 0, 1 or both.
-		 * Both ports are not up-to-date now.
-		 */
-		set_bit(ETR_EVENT_PORT_ALERT, &etr_events);
-	queue_work(time_sync_wq, &etr_work);
-}
-
-static void etr_timeout(unsigned long dummy)
-{
-	set_bit(ETR_EVENT_UPDATE, &etr_events);
-	queue_work(time_sync_wq, &etr_work);
-}
-
-/*
- * Check if the etr mode is pss.
- */
-static inline int etr_mode_is_pps(struct etr_eacr eacr)
-{
-	return eacr.es && !eacr.sl;
-}
-
-/*
- * Check if the etr mode is etr.
- */
-static inline int etr_mode_is_etr(struct etr_eacr eacr)
-{
-	return eacr.es && eacr.sl;
-}
-
-/*
- * Check if the port can be used for TOD synchronization.
- * For PPS mode the port has to receive OTEs. For ETR mode
- * the port has to receive OTEs, the ETR stepping bit has to
- * be zero and the validity bits for data frame 1, 2, and 3
- * have to be 1.
- */
-static int etr_port_valid(struct etr_aib *aib, int port)
-{
-	unsigned int psc;
-
-	/* Check that this port is receiving OTEs. */
-	if (aib->tsp == 0)
-		return 0;
-
-	psc = port ? aib->esw.psc1 : aib->esw.psc0;
-	if (psc == etr_lpsc_pps_mode)
-		return 1;
-	if (psc == etr_lpsc_operational_step)
-		return !aib->esw.y && aib->slsw.v1 &&
-			aib->slsw.v2 && aib->slsw.v3;
-	return 0;
-}
-
-/*
- * Check if two ports are on the same network.
- */
-static int etr_compare_network(struct etr_aib *aib1, struct etr_aib *aib2)
-{
-	// FIXME: any other fields we have to compare?
-	return aib1->edf1.net_id == aib2->edf1.net_id;
-}
-
-/*
- * Wrapper for etr_stei that converts physical port states
- * to logical port states to be consistent with the output
- * of stetr (see etr_psc vs. etr_lpsc).
- */
-static void etr_steai_cv(struct etr_aib *aib, unsigned int func)
-{
-	BUG_ON(etr_steai(aib, func) != 0);
-	/* Convert port state to logical port state. */
-	if (aib->esw.psc0 == 1)
-		aib->esw.psc0 = 2;
-	else if (aib->esw.psc0 == 0 && aib->esw.p == 0)
-		aib->esw.psc0 = 1;
-	if (aib->esw.psc1 == 1)
-		aib->esw.psc1 = 2;
-	else if (aib->esw.psc1 == 0 && aib->esw.p == 1)
-		aib->esw.psc1 = 1;
-}
-
-/*
- * Check if the aib a2 is still connected to the same attachment as
- * aib a1, the etv values differ by one and a2 is valid.
- */
-static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
-{
-	int state_a1, state_a2;
-
-	/* Paranoia check: e0/e1 should better be the same. */
-	if (a1->esw.eacr.e0 != a2->esw.eacr.e0 ||
-	    a1->esw.eacr.e1 != a2->esw.eacr.e1)
-		return 0;
-
-	/* Still connected to the same etr ? */
-	state_a1 = p ? a1->esw.psc1 : a1->esw.psc0;
-	state_a2 = p ? a2->esw.psc1 : a2->esw.psc0;
-	if (state_a1 == etr_lpsc_operational_step) {
-		if (state_a2 != etr_lpsc_operational_step ||
-		    a1->edf1.net_id != a2->edf1.net_id ||
-		    a1->edf1.etr_id != a2->edf1.etr_id ||
-		    a1->edf1.etr_pn != a2->edf1.etr_pn)
-			return 0;
-	} else if (state_a2 != etr_lpsc_pps_mode)
-		return 0;
-
-	/* The ETV value of a2 needs to be ETV of a1 + 1. */
-	if (a1->edf2.etv + 1 != a2->edf2.etv)
-		return 0;
-
-	if (!etr_port_valid(a2, p))
-		return 0;
-
-	return 1;
-}
-
 struct clock_sync_data {
 	atomic_t cpus;
 	int in_sync;
@@ -747,688 +480,6 @@ static void clock_sync_cpu(struct clock_sync_data *sync)
 	fixup_clock_comparator(sync->fixup_cc);
 }
 
-/*
- * Sync the TOD clock using the port referred to by aibp. This port
- * has to be enabled and the other port has to be disabled. The
- * last eacr update has to be more than 1.6 seconds in the past.
- */
-static int etr_sync_clock(void *data)
-{
-	static int first;
-	unsigned long long clock, old_clock, clock_delta, delay, delta;
-	struct clock_sync_data *etr_sync;
-	struct etr_aib *sync_port, *aib;
-	int port;
-	int rc;
-
-	etr_sync = data;
-
-	if (xchg(&first, 1) == 1) {
-		/* Slave */
-		clock_sync_cpu(etr_sync);
-		return 0;
-	}
-
-	/* Wait until all other cpus entered the sync function. */
-	while (atomic_read(&etr_sync->cpus) != 0)
-		cpu_relax();
-
-	port = etr_sync->etr_port;
-	aib = etr_sync->etr_aib;
-	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-	enable_sync_clock();
-
-	/* Set clock to next OTE. */
-	__ctl_set_bit(14, 21);
-	__ctl_set_bit(0, 29);
-	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
-	old_clock = get_tod_clock();
-	if (set_tod_clock(clock) == 0) {
-		__udelay(1);	/* Wait for the clock to start. */
-		__ctl_clear_bit(0, 29);
-		__ctl_clear_bit(14, 21);
-		etr_stetr(aib);
-		/* Adjust Linux timing variables. */
-		delay = (unsigned long long)
-			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		delta = adjust_time(old_clock, clock, delay);
-		clock_delta = clock - old_clock;
-		atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
-					   &clock_delta);
-		etr_sync->fixup_cc = delta;
-		fixup_clock_comparator(delta);
-		/* Verify that the clock is properly set. */
-		if (!etr_aib_follows(sync_port, aib, port)) {
-			/* Didn't work. */
-			disable_sync_clock(NULL);
-			etr_sync->in_sync = -EAGAIN;
-			rc = -EAGAIN;
-		} else {
-			etr_sync->in_sync = 1;
-			rc = 0;
-		}
-	} else {
-		/* Could not set the clock ?!? */
-		__ctl_clear_bit(0, 29);
-		__ctl_clear_bit(14, 21);
-		disable_sync_clock(NULL);
-		etr_sync->in_sync = -EAGAIN;
-		rc = -EAGAIN;
-	}
-	xchg(&first, 0);
-	return rc;
-}
-
-static int etr_sync_clock_stop(struct etr_aib *aib, int port)
-{
-	struct clock_sync_data etr_sync;
-	struct etr_aib *sync_port;
-	int follows;
-	int rc;
-
-	/* Check if the current aib is adjacent to the sync port aib. */
-	sync_port = (port == 0) ? &etr_port0 : &etr_port1;
-	follows = etr_aib_follows(sync_port, aib, port);
-	memcpy(sync_port, aib, sizeof(*aib));
-	if (!follows)
-		return -EAGAIN;
-	memset(&etr_sync, 0, sizeof(etr_sync));
-	etr_sync.etr_aib = aib;
-	etr_sync.etr_port = port;
-	get_online_cpus();
-	atomic_set(&etr_sync.cpus, num_online_cpus() - 1);
-	rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask);
-	put_online_cpus();
-	return rc;
-}
-
-/*
- * Handle the immediate effects of the different events.
- * The port change event is used for online/offline changes.
- */
-static struct etr_eacr etr_handle_events(struct etr_eacr eacr)
-{
-	if (test_and_clear_bit(ETR_EVENT_SYNC_CHECK, &etr_events))
-		eacr.es = 0;
-	if (test_and_clear_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events))
-		eacr.es = eacr.sl = 0;
-	if (test_and_clear_bit(ETR_EVENT_PORT_ALERT, &etr_events))
-		etr_port0_uptodate = etr_port1_uptodate = 0;
-
-	if (test_and_clear_bit(ETR_EVENT_PORT0_CHANGE, &etr_events)) {
-		if (eacr.e0)
-			/*
-			 * Port change of an enabled port. We have to
-			 * assume that this can have caused an stepping
-			 * port switch.
-			 */
-			etr_tolec = get_tod_clock();
-		eacr.p0 = etr_port0_online;
-		if (!eacr.p0)
-			eacr.e0 = 0;
-		etr_port0_uptodate = 0;
-	}
-	if (test_and_clear_bit(ETR_EVENT_PORT1_CHANGE, &etr_events)) {
-		if (eacr.e1)
-			/*
-			 * Port change of an enabled port. We have to
-			 * assume that this can have caused an stepping
-			 * port switch.
-			 */
-			etr_tolec = get_tod_clock();
-		eacr.p1 = etr_port1_online;
-		if (!eacr.p1)
-			eacr.e1 = 0;
-		etr_port1_uptodate = 0;
-	}
-	clear_bit(ETR_EVENT_UPDATE, &etr_events);
-	return eacr;
-}
-
-/*
- * Set up a timer that expires after the etr_tolec + 1.6 seconds if
- * one of the ports needs an update.
- */
-static void etr_set_tolec_timeout(unsigned long long now)
-{
-	unsigned long micros;
-
-	if ((!etr_eacr.p0 || etr_port0_uptodate) &&
-	    (!etr_eacr.p1 || etr_port1_uptodate))
-		return;
-	micros = (now > etr_tolec) ? ((now - etr_tolec) >> 12) : 0;
-	micros = (micros > 1600000) ? 0 : 1600000 - micros;
-	mod_timer(&etr_timer, jiffies + (micros * HZ) / 1000000 + 1);
-}
-
-/*
- * Set up a time that expires after 1/2 second.
- */
-static void etr_set_sync_timeout(void)
-{
-	mod_timer(&etr_timer, jiffies + HZ/2);
-}
-
-/*
- * Update the aib information for one or both ports.
- */
-static struct etr_eacr etr_handle_update(struct etr_aib *aib,
-					 struct etr_eacr eacr)
-{
-	/* With both ports disabled the aib information is useless. */
-	if (!eacr.e0 && !eacr.e1)
-		return eacr;
-
-	/* Update port0 or port1 with aib stored in etr_work_fn. */
-	if (aib->esw.q == 0) {
-		/* Information for port 0 stored. */
-		if (eacr.p0 && !etr_port0_uptodate) {
-			etr_port0 = *aib;
-			if (etr_port0_online)
-				etr_port0_uptodate = 1;
-		}
-	} else {
-		/* Information for port 1 stored. */
-		if (eacr.p1 && !etr_port1_uptodate) {
-			etr_port1 = *aib;
-			if (etr_port0_online)
-				etr_port1_uptodate = 1;
-		}
-	}
-
-	/*
-	 * Do not try to get the alternate port aib if the clock
-	 * is not in sync yet.
-	 */
-	if (!eacr.es || !check_sync_clock())
-		return eacr;
-
-	/*
-	 * If steai is available we can get the information about
-	 * the other port immediately. If only stetr is available the
-	 * data-port bit toggle has to be used.
-	 */
-	if (etr_steai_available) {
-		if (eacr.p0 && !etr_port0_uptodate) {
-			etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
-			etr_port0_uptodate = 1;
-		}
-		if (eacr.p1 && !etr_port1_uptodate) {
-			etr_steai_cv(&etr_port1, ETR_STEAI_PORT_1);
-			etr_port1_uptodate = 1;
-		}
-	} else {
-		/*
-		 * One port was updated above, if the other
-		 * port is not uptodate toggle dp bit.
-		 */
-		if ((eacr.p0 && !etr_port0_uptodate) ||
-		    (eacr.p1 && !etr_port1_uptodate))
-			eacr.dp ^= 1;
-		else
-			eacr.dp = 0;
-	}
-	return eacr;
-}
-
-/*
- * Write new etr control register if it differs from the current one.
- * Return 1 if etr_tolec has been updated as well.
- */
-static void etr_update_eacr(struct etr_eacr eacr)
-{
-	int dp_changed;
-
-	if (memcmp(&etr_eacr, &eacr, sizeof(eacr)) == 0)
-		/* No change, return. */
-		return;
-	/*
-	 * The disable of an active port of the change of the data port
-	 * bit can/will cause a change in the data port.
-	 */
-	dp_changed = etr_eacr.e0 > eacr.e0 || etr_eacr.e1 > eacr.e1 ||
-		(etr_eacr.dp ^ eacr.dp) != 0;
-	etr_eacr = eacr;
-	etr_setr(&etr_eacr);
-	if (dp_changed)
-		etr_tolec = get_tod_clock();
-}
-
-/*
- * ETR work. In this function you'll find the main logic. In
- * particular this is the only function that calls etr_update_eacr(),
- * it "controls" the etr control register.
- */
-static void etr_work_fn(struct work_struct *work)
-{
-	unsigned long long now;
-	struct etr_eacr eacr;
-	struct etr_aib aib;
-	int sync_port;
-
-	/* prevent multiple execution. */
-	mutex_lock(&etr_work_mutex);
-
-	/* Create working copy of etr_eacr. */
-	eacr = etr_eacr;
-
-	/* Check for the different events and their immediate effects. */
-	eacr = etr_handle_events(eacr);
-
-	/* Check if ETR is supposed to be active. */
-	eacr.ea = eacr.p0 || eacr.p1;
-	if (!eacr.ea) {
-		/* Both ports offline. Reset everything. */
-		eacr.dp = eacr.es = eacr.sl = 0;
-		on_each_cpu(disable_sync_clock, NULL, 1);
-		del_timer_sync(&etr_timer);
-		etr_update_eacr(eacr);
-		goto out_unlock;
-	}
-
-	/* Store aib to get the current ETR status word. */
-	BUG_ON(etr_stetr(&aib) != 0);
-	etr_port0.esw = etr_port1.esw = aib.esw;	/* Copy status word. */
-	now = get_tod_clock();
-
-	/*
-	 * Update the port information if the last stepping port change
-	 * or data port change is older than 1.6 seconds.
-	 */
-	if (now >= etr_tolec + (1600000 << 12))
-		eacr = etr_handle_update(&aib, eacr);
-
-	/*
-	 * Select ports to enable. The preferred synchronization mode is PPS.
-	 * If a port can be enabled depends on a number of things:
-	 * 1) The port needs to be online and uptodate. A port is not
-	 *    disabled just because it is not uptodate, but it is only
-	 *    enabled if it is uptodate.
-	 * 2) The port needs to have the same mode (pps / etr).
-	 * 3) The port needs to be usable -> etr_port_valid() == 1
-	 * 4) To enable the second port the clock needs to be in sync.
-	 * 5) If both ports are useable and are ETR ports, the network id
-	 *    has to be the same.
-	 * The eacr.sl bit is used to indicate etr mode vs. pps mode.
-	 */
-	if (eacr.p0 && aib.esw.psc0 == etr_lpsc_pps_mode) {
-		eacr.sl = 0;
-		eacr.e0 = 1;
-		if (!etr_mode_is_pps(etr_eacr))
-			eacr.es = 0;
-		if (!eacr.es || !eacr.p1 || aib.esw.psc1 != etr_lpsc_pps_mode)
-			eacr.e1 = 0;
-		// FIXME: uptodate checks ?
-		else if (etr_port0_uptodate && etr_port1_uptodate)
-			eacr.e1 = 1;
-		sync_port = (etr_port0_uptodate &&
-			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
-		eacr.sl = 0;
-		eacr.e0 = 0;
-		eacr.e1 = 1;
-		if (!etr_mode_is_pps(etr_eacr))
-			eacr.es = 0;
-		sync_port = (etr_port1_uptodate &&
-			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-	} else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
-		eacr.sl = 1;
-		eacr.e0 = 1;
-		if (!etr_mode_is_etr(etr_eacr))
-			eacr.es = 0;
-		if (!eacr.es || !eacr.p1 ||
-		    aib.esw.psc1 != etr_lpsc_operational_alt)
-			eacr.e1 = 0;
-		else if (etr_port0_uptodate && etr_port1_uptodate &&
-			 etr_compare_network(&etr_port0, &etr_port1))
-			eacr.e1 = 1;
-		sync_port = (etr_port0_uptodate &&
-			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
-		eacr.sl = 1;
-		eacr.e0 = 0;
-		eacr.e1 = 1;
-		if (!etr_mode_is_etr(etr_eacr))
-			eacr.es = 0;
-		sync_port = (etr_port1_uptodate &&
-			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-	} else {
-		/* Both ports not usable. */
-		eacr.es = eacr.sl = 0;
-		sync_port = -1;
-	}
-
-	/*
-	 * If the clock is in sync just update the eacr and return.
-	 * If there is no valid sync port wait for a port update.
-	 */
-	if ((eacr.es && check_sync_clock()) || sync_port < 0) {
-		etr_update_eacr(eacr);
-		etr_set_tolec_timeout(now);
-		goto out_unlock;
-	}
-
-	/*
-	 * Prepare control register for clock syncing
-	 * (reset data port bit, set sync check control.
-	 */
-	eacr.dp = 0;
-	eacr.es = 1;
-
-	/*
-	 * Update eacr and try to synchronize the clock. If the update
-	 * of eacr caused a stepping port switch (or if we have to
-	 * assume that a stepping port switch has occurred) or the
-	 * clock syncing failed, reset the sync check control bit
-	 * and set up a timer to try again after 0.5 seconds
-	 */
-	etr_update_eacr(eacr);
-	if (now < etr_tolec + (1600000 << 12) ||
-	    etr_sync_clock_stop(&aib, sync_port) != 0) {
-		/* Sync failed. Try again in 1/2 second. */
-		eacr.es = 0;
-		etr_update_eacr(eacr);
-		etr_set_sync_timeout();
-	} else
-		etr_set_tolec_timeout(now);
-out_unlock:
-	mutex_unlock(&etr_work_mutex);
-}
-
-/*
- * Sysfs interface functions
- */
-static struct bus_type etr_subsys = {
-	.name		= "etr",
-	.dev_name	= "etr",
-};
-
-static struct device etr_port0_dev = {
-	.id	= 0,
-	.bus	= &etr_subsys,
-};
-
-static struct device etr_port1_dev = {
-	.id	= 1,
-	.bus	= &etr_subsys,
-};
-
-/*
- * ETR subsys attributes
- */
-static ssize_t etr_stepping_port_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%i\n", etr_port0.esw.p);
-}
-
-static DEVICE_ATTR(stepping_port, 0400, etr_stepping_port_show, NULL);
-
-static ssize_t etr_stepping_mode_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	char *mode_str;
-
-	if (etr_mode_is_pps(etr_eacr))
-		mode_str = "pps";
-	else if (etr_mode_is_etr(etr_eacr))
-		mode_str = "etr";
-	else
-		mode_str = "local";
-	return sprintf(buf, "%s\n", mode_str);
-}
-
-static DEVICE_ATTR(stepping_mode, 0400, etr_stepping_mode_show, NULL);
-
-/*
- * ETR port attributes
- */
-static inline struct etr_aib *etr_aib_from_dev(struct device *dev)
-{
-	if (dev == &etr_port0_dev)
-		return etr_port0_online ? &etr_port0 : NULL;
-	else
-		return etr_port1_online ? &etr_port1 : NULL;
-}
-
-static ssize_t etr_online_show(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
-{
-	unsigned int online;
-
-	online = (dev == &etr_port0_dev) ? etr_port0_online : etr_port1_online;
-	return sprintf(buf, "%i\n", online);
-}
-
-static ssize_t etr_online_store(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
-{
-	unsigned int value;
-
-	value = simple_strtoul(buf, NULL, 0);
-	if (value != 0 && value != 1)
-		return -EINVAL;
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
-		return -EOPNOTSUPP;
-	mutex_lock(&clock_sync_mutex);
-	if (dev == &etr_port0_dev) {
-		if (etr_port0_online == value)
-			goto out;	/* Nothing to do. */
-		etr_port0_online = value;
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		else
-			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	} else {
-		if (etr_port1_online == value)
-			goto out;	/* Nothing to do. */
-		etr_port1_online = value;
-		if (etr_port0_online && etr_port1_online)
-			set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		else
-			clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
-		set_bit(ETR_EVENT_PORT1_CHANGE, &etr_events);
-		queue_work(time_sync_wq, &etr_work);
-	}
-out:
-	mutex_unlock(&clock_sync_mutex);
-	return count;
-}
-
-static DEVICE_ATTR(online, 0600, etr_online_show, etr_online_store);
-
-static ssize_t etr_stepping_control_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
-		       etr_eacr.e0 : etr_eacr.e1);
-}
-
-static DEVICE_ATTR(stepping_control, 0400, etr_stepping_control_show, NULL);
-
-static ssize_t etr_mode_code_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	if (!etr_port0_online && !etr_port1_online)
-		/* Status word is not uptodate if both ports are offline. */
-		return -ENODATA;
-	return sprintf(buf, "%i\n", (dev == &etr_port0_dev) ?
-		       etr_port0.esw.psc0 : etr_port0.esw.psc1);
-}
-
-static DEVICE_ATTR(state_code, 0400, etr_mode_code_show, NULL);
-
-static ssize_t etr_untuned_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.u);
-}
-
-static DEVICE_ATTR(untuned, 0400, etr_untuned_show, NULL);
-
-static ssize_t etr_network_id_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.net_id);
-}
-
-static DEVICE_ATTR(network, 0400, etr_network_id_show, NULL);
-
-static ssize_t etr_id_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.etr_id);
-}
-
-static DEVICE_ATTR(id, 0400, etr_id_show, NULL);
-
-static ssize_t etr_port_number_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v1)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf1.etr_pn);
-}
-
-static DEVICE_ATTR(port, 0400, etr_port_number_show, NULL);
-
-static ssize_t etr_coupled_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.c);
-}
-
-static DEVICE_ATTR(coupled, 0400, etr_coupled_show, NULL);
-
-static ssize_t etr_local_time_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.blto);
-}
-
-static DEVICE_ATTR(local_time, 0400, etr_local_time_show, NULL);
-
-static ssize_t etr_utc_offset_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct etr_aib *aib = etr_aib_from_dev(dev);
-
-	if (!aib || !aib->slsw.v3)
-		return -ENODATA;
-	return sprintf(buf, "%i\n", aib->edf3.buo);
-}
-
-static DEVICE_ATTR(utc_offset, 0400, etr_utc_offset_show, NULL);
-
-static struct device_attribute *etr_port_attributes[] = {
-	&dev_attr_online,
-	&dev_attr_stepping_control,
-	&dev_attr_state_code,
-	&dev_attr_untuned,
-	&dev_attr_network,
-	&dev_attr_id,
-	&dev_attr_port,
-	&dev_attr_coupled,
-	&dev_attr_local_time,
-	&dev_attr_utc_offset,
-	NULL
-};
-
-static int __init etr_register_port(struct device *dev)
-{
-	struct device_attribute **attr;
-	int rc;
-
-	rc = device_register(dev);
-	if (rc)
-		goto out;
-	for (attr = etr_port_attributes; *attr; attr++) {
-		rc = device_create_file(dev, *attr);
-		if (rc)
-			goto out_unreg;
-	}
-	return 0;
-out_unreg:
-	for (; attr >= etr_port_attributes; attr--)
-		device_remove_file(dev, *attr);
-	device_unregister(dev);
-out:
-	return rc;
-}
-
-static void __init etr_unregister_port(struct device *dev)
-{
-	struct device_attribute **attr;
-
-	for (attr = etr_port_attributes; *attr; attr++)
-		device_remove_file(dev, *attr);
-	device_unregister(dev);
-}
-
-static int __init etr_init_sysfs(void)
-{
-	int rc;
-
-	rc = subsys_system_register(&etr_subsys, NULL);
-	if (rc)
-		goto out;
-	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-	if (rc)
-		goto out_unreg_subsys;
-	rc = device_create_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-	if (rc)
-		goto out_remove_stepping_port;
-	rc = etr_register_port(&etr_port0_dev);
-	if (rc)
-		goto out_remove_stepping_mode;
-	rc = etr_register_port(&etr_port1_dev);
-	if (rc)
-		goto out_remove_port0;
-	return 0;
-
-out_remove_port0:
-	etr_unregister_port(&etr_port0_dev);
-out_remove_stepping_mode:
-	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_mode);
-out_remove_stepping_port:
-	device_remove_file(etr_subsys.dev_root, &dev_attr_stepping_port);
-out_unreg_subsys:
-	bus_unregister(&etr_subsys);
-out:
-	return rc;
-}
-
-device_initcall(etr_init_sysfs);
-
 /*
  * Server Time Protocol (STP) code.
  */
@@ -1455,7 +506,7 @@ static void __init stp_reset(void)
 	int rc;
 
 	stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
 	if (rc == 0)
 		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
 	else if (stp_online) {
@@ -1533,6 +584,7 @@ static int stp_sync_clock(void *data)
 	static int first;
 	unsigned long long old_clock, delta, new_clock, clock_delta;
 	struct clock_sync_data *stp_sync;
+	struct ptff_qto qto;
 	int rc;
 
 	stp_sync = data;
@@ -1554,11 +606,14 @@ static int stp_sync_clock(void *data)
 	    stp_info.todoff[2] || stp_info.todoff[3] ||
 	    stp_info.tmd != 2) {
 		old_clock = get_tod_clock();
-		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta);
 		if (rc == 0) {
-			new_clock = get_tod_clock();
+			new_clock = old_clock + clock_delta;
 			delta = adjust_time(old_clock, new_clock, 0);
-			clock_delta = new_clock - old_clock;
+			if (ptff_query(PTFF_QTO) &&
+			    ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+				/* Update LPAR offset */
+				lpar_offset = qto.tod_epoch_difference;
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 						   0, &clock_delta);
 			fixup_clock_comparator(delta);
@@ -1590,12 +645,12 @@ static void stp_work_fn(struct work_struct *work)
 	mutex_lock(&stp_work_mutex);
 
 	if (!stp_online) {
-		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
 		del_timer_sync(&stp_timer);
 		goto out_unlock;
 	}
 
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
 	if (rc)
 		goto out_unlock;
 
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 64298a867..e959c02e0 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -46,6 +46,7 @@ static DECLARE_WORK(topology_work, topology_work_fn);
  */
 static struct mask_info socket_info;
 static struct mask_info book_info;
+static struct mask_info drawer_info;
 
 DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
 EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
@@ -79,10 +80,10 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
 	return mask;
 }
 
-static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
-					  struct mask_info *book,
-					  struct mask_info *socket,
-					  int one_socket_per_cpu)
+static void add_cpus_to_mask(struct topology_core *tl_core,
+			     struct mask_info *drawer,
+			     struct mask_info *book,
+			     struct mask_info *socket)
 {
 	struct cpu_topology_s390 *topo;
 	unsigned int core;
@@ -97,21 +98,17 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
 			continue;
 		for (i = 0; i <= smp_cpu_mtid; i++) {
 			topo = &per_cpu(cpu_topology, lcpu + i);
+			topo->drawer_id = drawer->id;
 			topo->book_id = book->id;
+			topo->socket_id = socket->id;
 			topo->core_id = rcore;
 			topo->thread_id = lcpu + i;
+			cpumask_set_cpu(lcpu + i, &drawer->mask);
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
-			if (one_socket_per_cpu)
-				topo->socket_id = rcore;
-			else
-				topo->socket_id = socket->id;
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 		}
-		if (one_socket_per_cpu)
-			socket = socket->next;
 	}
-	return socket;
 }
 
 static void clear_masks(void)
@@ -128,6 +125,11 @@ static void clear_masks(void)
 		cpumask_clear(&info->mask);
 		info = info->next;
 	}
+	info = &drawer_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
 }
 
 static union topology_entry *next_tle(union topology_entry *tle)
@@ -137,16 +139,22 @@ static union topology_entry *next_tle(union topology_entry *tle)
 	return (union topology_entry *)((struct topology_container *)tle + 1);
 }
 
-static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
+static void tl_to_masks(struct sysinfo_15_1_x *info)
 {
 	struct mask_info *socket = &socket_info;
 	struct mask_info *book = &book_info;
+	struct mask_info *drawer = &drawer_info;
 	union topology_entry *tle, *end;
 
+	clear_masks();
 	tle = info->tle;
 	end = (union topology_entry *)((unsigned long)info + info->length);
 	while (tle < end) {
 		switch (tle->nl) {
+		case 3:
+			drawer = drawer->next;
+			drawer->id = tle->container.id;
+			break;
 		case 2:
 			book = book->next;
 			book->id = tle->container.id;
@@ -156,32 +164,7 @@ static void __tl_to_masks_generic(struct sysinfo_15_1_x *info)
 			socket->id = tle->container.id;
 			break;
 		case 0:
-			add_cpus_to_mask(&tle->cpu, book, socket, 0);
-			break;
-		default:
-			clear_masks();
-			return;
-		}
-		tle = next_tle(tle);
-	}
-}
-
-static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
-{
-	struct mask_info *socket = &socket_info;
-	struct mask_info *book = &book_info;
-	union topology_entry *tle, *end;
-
-	tle = info->tle;
-	end = (union topology_entry *)((unsigned long)info + info->length);
-	while (tle < end) {
-		switch (tle->nl) {
-		case 1:
-			book = book->next;
-			book->id = tle->container.id;
-			break;
-		case 0:
-			socket = add_cpus_to_mask(&tle->cpu, book, socket, 1);
+			add_cpus_to_mask(&tle->cpu, drawer, book, socket);
 			break;
 		default:
 			clear_masks();
@@ -191,22 +174,6 @@ static void __tl_to_masks_z10(struct sysinfo_15_1_x *info)
 	}
 }
 
-static void tl_to_masks(struct sysinfo_15_1_x *info)
-{
-	struct cpuid cpu_id;
-
-	get_cpu_id(&cpu_id);
-	clear_masks();
-	switch (cpu_id.machine) {
-	case 0x2097:
-	case 0x2098:
-		__tl_to_masks_z10(info);
-		break;
-	default:
-		__tl_to_masks_generic(info);
-	}
-}
-
 static void topology_update_polarization_simple(void)
 {
 	int cpu;
@@ -257,11 +224,13 @@ static void update_cpu_masks(void)
 		topo->thread_mask = cpu_thread_map(cpu);
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
+		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
 		if (!MACHINE_HAS_TOPOLOGY) {
 			topo->thread_id = cpu;
 			topo->core_id = cpu;
 			topo->socket_id = cpu;
 			topo->book_id = cpu;
+			topo->drawer_id = cpu;
 		}
 	}
 	numa_update_cpu_topology();
@@ -269,10 +238,7 @@ static void update_cpu_masks(void)
 
 void store_topology(struct sysinfo_15_1_x *info)
 {
-	if (topology_max_mnest >= 3)
-		stsi(info, 15, 1, 3);
-	else
-		stsi(info, 15, 1, 2);
+	stsi(info, 15, 1, min(topology_max_mnest, 4));
 }
 
 int arch_update_cpu_topology(void)
@@ -442,6 +408,11 @@ static const struct cpumask *cpu_book_mask(int cpu)
 	return &per_cpu(cpu_topology, cpu).book_mask;
 }
 
+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+	return &per_cpu(cpu_topology, cpu).drawer_mask;
+}
+
 static int __init early_parse_topology(char *p)
 {
 	return kstrtobool(p, &topology_enabled);
@@ -452,6 +423,7 @@ static struct sched_domain_topology_level s390_topology[] = {
 	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+	{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
 	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
 	{ NULL, },
 };
@@ -487,6 +459,7 @@ static int __init s390_topology_init(void)
 	printk(KERN_CONT " / %d\n", info->mnest);
 	alloc_masks(info, &socket_info, 1);
 	alloc_masks(info, &book_info, 2);
+	alloc_masks(info, &drawer_info, 3);
 	set_sched_topology(s390_topology);
 	return 0;
 }
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index f9c459586..68145456f 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -1,5 +1,7 @@
 # List of files in the vdso, has to be asm only for now
 
+KCOV_INSTRUMENT := n
+
 obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 
 # Build rules
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 058659c1b..0b0fd22c8 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,5 +1,7 @@
 # List of files in the vdso, has to be asm only for now
 
+KCOV_INSTRUMENT := n
+
 obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
 
 # Build rules
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 0f41a8286..429bfd111 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -4,6 +4,16 @@
 
 #include <asm/thread_info.h>
 #include <asm/page.h>
+
+/*
+ * Put .bss..swapper_pg_dir as the first thing in .bss. This will
+ * make sure it has 16k alignment.
+ */
+#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
+
+/* Handle ro_after_init data on our own. */
+#define RO_AFTER_INIT_DATA
+
 #include <asm-generic/vmlinux.lds.h>
 
 OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
@@ -49,7 +59,14 @@ SECTIONS
 	_eshared = .;		/* End of shareable data */
 	_sdata = .;		/* Start of data section */
 
-	EXCEPTION_TABLE(16) :data
+	. = ALIGN(PAGE_SIZE);
+	__start_ro_after_init = .;
+	.data..ro_after_init : {
+		 *(.data..ro_after_init)
+	}
+	EXCEPTION_TABLE(16)
+	. = ALIGN(PAGE_SIZE);
+	__end_ro_after_init = .;
 
 	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
 
@@ -81,7 +98,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;		/* freed after init ends here */
 
-	BSS_SECTION(0, 2, 0)
+	BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
 
 	_end = . ;
 
-- 
cgit v1.2.3-54-g00ecf