From d0b2f91bede3bd5e3d24dd6803e56eee959c1797 Mon Sep 17 00:00:00 2001
From: André Fabian Silva Delgado <emulatorman@parabola.nu>
Date: Thu, 20 Oct 2016 00:10:27 -0300
Subject: Linux-libre 4.8.2-gnu

---
 lib/Kconfig.debug       |  67 ++-----
 lib/Kconfig.kasan       |   4 +-
 lib/Makefile            |  10 +-
 lib/atomic64.c          |  32 +++-
 lib/atomic64_test.c     |  34 ++++
 lib/bitmap.c            |   2 +-
 lib/chacha20.c          |  79 ++++++++
 lib/crc32.c             |  16 +-
 lib/digsig.c            |  16 +-
 lib/dma-debug.c         |   2 +
 lib/dma-noop.c          |   9 +-
 lib/dynamic_debug.c     |   7 +
 lib/earlycpio.c         |   5 +-
 lib/hweight.c           |   4 +
 lib/iommu-helper.c      |   3 +-
 lib/iov_iter.c          |  53 +++---
 lib/mpi/mpicoder.c      | 247 ++++++++-----------------
 lib/radix-tree.c        |  92 +++++++++-
 lib/random32.c          |   1 -
 lib/ratelimit.c         |  10 +-
 lib/rbtree.c            |  26 ++-
 lib/rhashtable.c        |  20 +-
 lib/sradix-tree.c       | 476 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/stackdepot.c        |   1 +
 lib/strncpy_from_user.c |   8 +-
 lib/strnlen_user.c      |   7 +-
 lib/swiotlb.c           |  13 +-
 lib/test_hash.c         |  30 +--
 lib/test_rhashtable.c   |   2 +-
 lib/ubsan.c             |   2 +-
 lib/wbt.c               | 288 +++++++++++++++++++++--------
 31 files changed, 1148 insertions(+), 418 deletions(-)
 create mode 100644 lib/chacha20.c
 create mode 100644 lib/sradix-tree.c

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 42d3d798c..896a3d0b1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -244,6 +244,7 @@ config PAGE_OWNER
 	depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
 	select DEBUG_FS
 	select STACKTRACE
+	select STACKDEPOT
 	select PAGE_EXTENSION
 	help
 	  This keeps track of what call chain is the owner of a page, may
@@ -708,6 +709,8 @@ config KCOV
 	bool "Code coverage for fuzzing"
 	depends on ARCH_HAS_KCOV
 	select DEBUG_FS
+	select GCC_PLUGINS if !COMPILE_TEST
+	select GCC_PLUGIN_SANCOV if !COMPILE_TEST
 	help
 	  KCOV exposes kernel code coverage information in a form suitable
 	  for coverage-guided fuzzing (randomized testing).
@@ -718,6 +721,17 @@ config KCOV
 
 	  For more details, see Documentation/kcov.txt.
 
+config KCOV_INSTRUMENT_ALL
+	bool "Instrument all code by default"
+	depends on KCOV
+	default y if KCOV
+	help
+	  If you are doing generic system call fuzzing (like e.g. syzkaller),
+	  then you will want to instrument the whole kernel and you should
+	  say y here. If you are doing more targeted fuzzing (like e.g.
+	  filesystem fuzzing with AFL) then you will want to enable coverage
+	  for more specific subsets of files, and should say n here.
+
 config DEBUG_SHIRQ
 	bool "Debug shared IRQ handlers"
 	depends on DEBUG_KERNEL
@@ -807,7 +821,7 @@ config DETECT_HUNG_TASK
 	help
 	  Say Y here to enable the kernel to detect "hung tasks",
 	  which are bugs that cause the task to be stuck in
-	  uninterruptible "D" state indefinitiley.
+	  uninterruptible "D" state indefinitely.
 
 	  When a hung task is detected, the kernel will print the
 	  current stack trace (which you should report), but the
@@ -1307,22 +1321,6 @@ config RCU_PERF_TEST
 	  Say M if you want the RCU performance tests to build as a module.
 	  Say N if you are unsure.
 
-config RCU_PERF_TEST_RUNNABLE
-	bool "performance tests for RCU runnable by default"
-	depends on RCU_PERF_TEST = y
-	default n
-	help
-	  This option provides a way to build the RCU performance tests
-	  directly into the kernel without them starting up at boot time.
-	  You can use /sys/module to manually override this setting.
-	  This /proc file is available only when the RCU performance
-	  tests have been built into the kernel.
-
-	  Say Y here if you want the RCU performance tests to start during
-	  boot (you probably don't).
-	  Say N here if you want the RCU performance tests to start only
-	  after being manually enabled via /sys/module.
-
 config RCU_TORTURE_TEST
 	tristate "torture tests for RCU"
 	depends on DEBUG_KERNEL && !SCHED_BFS
@@ -1340,23 +1338,6 @@ config RCU_TORTURE_TEST
 	  Say M if you want the RCU torture tests to build as a module.
 	  Say N if you are unsure.
 
-config RCU_TORTURE_TEST_RUNNABLE
-	bool "torture tests for RCU runnable by default"
-	depends on RCU_TORTURE_TEST = y
-	default n
-	help
-	  This option provides a way to build the RCU torture tests
-	  directly into the kernel without them starting up at boot
-	  time.  You can use /proc/sys/kernel/rcutorture_runnable
-	  to manually override this setting.  This /proc file is
-	  available only when the RCU torture tests have been built
-	  into the kernel.
-
-	  Say Y here if you want the RCU torture tests to start during
-	  boot (you probably don't).
-	  Say N here if you want the RCU torture tests to start only
-	  after being manually enabled via /proc.
-
 config RCU_TORTURE_TEST_SLOW_PREINIT
 	bool "Slow down RCU grace-period pre-initialization to expose races"
 	depends on RCU_TORTURE_TEST
@@ -1706,24 +1687,6 @@ config LATENCYTOP
 	  Enable this option if you want to use the LatencyTOP tool
 	  to find out which userspace is blocking on what kernel operations.
 
-config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
-	bool
-
-config DEBUG_STRICT_USER_COPY_CHECKS
-	bool "Strict user copy size checks"
-	depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
-	depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
-	help
-	  Enabling this option turns a certain set of sanity checks for user
-	  copy operations into compile time failures.
-
-	  The copy_from_user() etc checks are there to help test if there
-	  are sufficient security checks on the length argument of
-	  the copy operation, by having gcc prove that the argument is
-	  within bounds.
-
-	  If unsure, say N.
-
 source kernel/trace/Kconfig
 
 menu "Runtime Testing"
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 67d8c6838..bd38aab05 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,9 +5,9 @@ if HAVE_ARCH_KASAN
 
 config KASAN
 	bool "KASan: runtime memory debugger"
-	depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
+	depends on SLUB || (SLAB && !DEBUG_SLAB)
 	select CONSTRUCTORS
-	select STACKDEPOT if SLAB
+	select STACKDEPOT
 	help
 	  Enables kernel address sanitizer - runtime memory debugger,
 	  designed to find out-of-bounds accesses and use-after-free bugs.
diff --git a/lib/Makefile b/lib/Makefile
index 5b5506e3b..5bd016bd3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,19 +15,15 @@ KCOV_INSTRUMENT_rbtree.o := n
 KCOV_INSTRUMENT_list_debug.o := n
 KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
+	 rbtree.o radix-tree.o sradix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
-	 sha1.o md5.o irq_regs.o argv_split.o \
+	 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
 
-obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_HAS_DMA) += dma-noop.o
@@ -74,8 +70,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
 obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
 
 obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 2886ebac6..53c2d5edc 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -96,17 +96,41 @@ long long atomic64_##op##_return(long long a, atomic64_t *v)		\
 }									\
 EXPORT_SYMBOL(atomic64_##op##_return);
 
+#define ATOMIC64_FETCH_OP(op, c_op)					\
+long long atomic64_fetch_##op(long long a, atomic64_t *v)		\
+{									\
+	unsigned long flags;						\
+	raw_spinlock_t *lock = lock_addr(v);				\
+	long long val;							\
+									\
+	raw_spin_lock_irqsave(lock, flags);				\
+	val = v->counter;						\
+	v->counter c_op a;						\
+	raw_spin_unlock_irqrestore(lock, flags);			\
+	return val;							\
+}									\
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
 #define ATOMIC64_OPS(op, c_op)						\
 	ATOMIC64_OP(op, c_op)						\
-	ATOMIC64_OP_RETURN(op, c_op)
+	ATOMIC64_OP_RETURN(op, c_op)					\
+	ATOMIC64_FETCH_OP(op, c_op)
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op)						\
+	ATOMIC64_OP(op, c_op)						\
+	ATOMIC64_OP_RETURN(op, c_op)					\
+	ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 123481814..dbb369145 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -53,11 +53,25 @@ do {								\
 	BUG_ON(atomic##bit##_read(&v) != r);			\
 } while (0)
 
+#define TEST_FETCH(bit, op, c_op, val)				\
+do {								\
+	atomic##bit##_set(&v, v0);				\
+	r = v0;							\
+	r c_op val;						\
+	BUG_ON(atomic##bit##_##op(val, &v) != v0);		\
+	BUG_ON(atomic##bit##_read(&v) != r);			\
+} while (0)
+
 #define RETURN_FAMILY_TEST(bit, op, c_op, val)			\
 do {								\
 	FAMILY_TEST(TEST_RETURN, bit, op, c_op, val);		\
 } while (0)
 
+#define FETCH_FAMILY_TEST(bit, op, c_op, val)			\
+do {								\
+	FAMILY_TEST(TEST_FETCH, bit, op, c_op, val);		\
+} while (0)
+
 #define TEST_ARGS(bit, op, init, ret, expect, args...)		\
 do {								\
 	atomic##bit##_set(&v, init);				\
@@ -114,6 +128,16 @@ static __init void test_atomic(void)
 	RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
 	RETURN_FAMILY_TEST(, sub_return, -=, -one);
 
+	FETCH_FAMILY_TEST(, fetch_add, +=, onestwos);
+	FETCH_FAMILY_TEST(, fetch_add, +=, -one);
+	FETCH_FAMILY_TEST(, fetch_sub, -=, onestwos);
+	FETCH_FAMILY_TEST(, fetch_sub, -=, -one);
+
+	FETCH_FAMILY_TEST(, fetch_or,  |=, v1);
+	FETCH_FAMILY_TEST(, fetch_and, &=, v1);
+	FETCH_FAMILY_TEST(, fetch_andnot, &= ~, v1);
+	FETCH_FAMILY_TEST(, fetch_xor, ^=, v1);
+
 	INC_RETURN_FAMILY_TEST(, v0);
 	DEC_RETURN_FAMILY_TEST(, v0);
 
@@ -154,6 +178,16 @@ static __init void test_atomic64(void)
 	RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
 	RETURN_FAMILY_TEST(64, sub_return, -=, -one);
 
+	FETCH_FAMILY_TEST(64, fetch_add, +=, onestwos);
+	FETCH_FAMILY_TEST(64, fetch_add, +=, -one);
+	FETCH_FAMILY_TEST(64, fetch_sub, -=, onestwos);
+	FETCH_FAMILY_TEST(64, fetch_sub, -=, -one);
+
+	FETCH_FAMILY_TEST(64, fetch_or,  |=, v1);
+	FETCH_FAMILY_TEST(64, fetch_and, &=, v1);
+	FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, v1);
+	FETCH_FAMILY_TEST(64, fetch_xor, ^=, v1);
+
 	INIT(v0);
 	atomic64_inc(&v);
 	r += one;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c66da508c..eca88087f 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -14,9 +14,9 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
+#include <linux/uaccess.h>
 
 #include <asm/page.h>
-#include <asm/uaccess.h>
 
 /*
  * bitmaps provide an array of bits, implemented using an an
diff --git a/lib/chacha20.c b/lib/chacha20.c
new file mode 100644
index 000000000..250ceed9e
--- /dev/null
+++ b/lib/chacha20.c
@@ -0,0 +1,79 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/cryptohash.h>
+#include <asm/unaligned.h>
+#include <crypto/chacha20.h>
+
+static inline u32 rotl32(u32 v, u8 n)
+{
+	return (v << n) | (v >> (sizeof(v) * 8 - n));
+}
+
+extern void chacha20_block(u32 *state, void *stream)
+{
+	u32 x[16], *out = stream;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		x[i] = state[i];
+
+	for (i = 0; i < 20; i += 2) {
+		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
+
+		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
+
+		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
+
+		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
+
+		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
+
+		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
+
+		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
+
+		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		out[i] = cpu_to_le32(x[i] + state[i]);
+
+	state[12]++;
+}
+EXPORT_SYMBOL(chacha20_block);
diff --git a/lib/crc32.c b/lib/crc32.c
index 9a907d489..7fbd1a112 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -979,7 +979,6 @@ static int __init crc32c_test(void)
 	int i;
 	int errors = 0;
 	int bytes = 0;
-	struct timespec start, stop;
 	u64 nsec;
 	unsigned long flags;
 
@@ -999,20 +998,17 @@ static int __init crc32c_test(void)
 	local_irq_save(flags);
 	local_irq_disable();
 
-	getnstimeofday(&start);
+	nsec = ktime_get_ns();
 	for (i = 0; i < 100; i++) {
 		if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
 		    test[i].start, test[i].length))
 			errors++;
 	}
-	getnstimeofday(&stop);
+	nsec = ktime_get_ns() - nsec;
 
 	local_irq_restore(flags);
 	local_irq_enable();
 
-	nsec = stop.tv_nsec - start.tv_nsec +
-		1000000000 * (stop.tv_sec - start.tv_sec);
-
 	pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
 
 	if (errors)
@@ -1065,7 +1061,6 @@ static int __init crc32_test(void)
 	int i;
 	int errors = 0;
 	int bytes = 0;
-	struct timespec start, stop;
 	u64 nsec;
 	unsigned long flags;
 
@@ -1088,7 +1083,7 @@ static int __init crc32_test(void)
 	local_irq_save(flags);
 	local_irq_disable();
 
-	getnstimeofday(&start);
+	nsec = ktime_get_ns();
 	for (i = 0; i < 100; i++) {
 		if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
 		    test[i].start, test[i].length))
@@ -1098,14 +1093,11 @@ static int __init crc32_test(void)
 		    test[i].start, test[i].length))
 			errors++;
 	}
-	getnstimeofday(&stop);
+	nsec = ktime_get_ns() - nsec;
 
 	local_irq_restore(flags);
 	local_irq_enable();
 
-	nsec = stop.tv_nsec - start.tv_nsec +
-		1000000000 * (stop.tv_sec - start.tv_sec);
-
 	pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
 		 CRC_LE_BITS, CRC_BE_BITS);
 
diff --git a/lib/digsig.c b/lib/digsig.c
index 07be6c1ef..55b8b2f41 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -104,21 +104,25 @@ static int digsig_verify_rsa(struct key *key,
 	datap = pkh->mpi;
 	endp = ukp->data + ukp->datalen;
 
-	err = -ENOMEM;
-
 	for (i = 0; i < pkh->nmpi; i++) {
 		unsigned int remaining = endp - datap;
 		pkey[i] = mpi_read_from_buffer(datap, &remaining);
-		if (!pkey[i])
+		if (IS_ERR(pkey[i])) {
+			err = PTR_ERR(pkey[i]);
 			goto err;
+		}
 		datap += remaining;
 	}
 
 	mblen = mpi_get_nbits(pkey[0]);
 	mlen = DIV_ROUND_UP(mblen, 8);
 
-	if (mlen == 0)
+	if (mlen == 0) {
+		err = -EINVAL;
 		goto err;
+	}
+
+	err = -ENOMEM;
 
 	out1 = kzalloc(mlen, GFP_KERNEL);
 	if (!out1)
@@ -126,8 +130,10 @@ static int digsig_verify_rsa(struct key *key,
 
 	nret = siglen;
 	in = mpi_read_from_buffer(sig, &nret);
-	if (!in)
+	if (IS_ERR(in)) {
+		err = PTR_ERR(in);
 		goto err;
+	}
 
 	res = mpi_alloc(mpi_get_nlimbs(in) * 2);
 	if (!res)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 51a76af25..fcfa1939a 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -253,6 +253,7 @@ static int hash_fn(struct dma_debug_entry *entry)
  */
 static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
 					   unsigned long *flags)
+	__acquires(&dma_entry_hash[idx].lock)
 {
 	int idx = hash_fn(entry);
 	unsigned long __flags;
@@ -267,6 +268,7 @@ static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
  */
 static void put_hash_bucket(struct hash_bucket *bucket,
 			    unsigned long *flags)
+	__releases(&bucket->lock)
 {
 	unsigned long __flags = *flags;
 
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
index 721456468..3d766e78f 100644
--- a/lib/dma-noop.c
+++ b/lib/dma-noop.c
@@ -10,7 +10,7 @@
 
 static void *dma_noop_alloc(struct device *dev, size_t size,
 			    dma_addr_t *dma_handle, gfp_t gfp,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	void *ret;
 
@@ -22,7 +22,7 @@ static void *dma_noop_alloc(struct device *dev, size_t size,
 
 static void dma_noop_free(struct device *dev, size_t size,
 			  void *cpu_addr, dma_addr_t dma_addr,
-			  struct dma_attrs *attrs)
+			  unsigned long attrs)
 {
 	free_pages((unsigned long)cpu_addr, get_order(size));
 }
@@ -30,13 +30,14 @@ static void dma_noop_free(struct device *dev, size_t size,
 static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
 				      unsigned long offset, size_t size,
 				      enum dma_data_direction dir,
-				      struct dma_attrs *attrs)
+				      unsigned long attrs)
 {
 	return page_to_phys(page) + offset;
 }
 
 static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
-			     enum dma_data_direction dir, struct dma_attrs *attrs)
+			     enum dma_data_direction dir,
+			     unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index fe42b6ec3..da796e2dc 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -188,6 +188,13 @@ static int ddebug_change(const struct ddebug_query *query,
 			newflags = (dp->flags & mask) | flags;
 			if (newflags == dp->flags)
 				continue;
+#ifdef HAVE_JUMP_LABEL
+			if (dp->flags & _DPRINTK_FLAGS_PRINT) {
+				if (!(flags & _DPRINTK_FLAGS_PRINT))
+					static_branch_disable(&dp->key.dd_key_true);
+			} else if (flags & _DPRINTK_FLAGS_PRINT)
+				static_branch_enable(&dp->key.dd_key_true);
+#endif
 			dp->flags = newflags;
 			vpr_info("changed %s:%d [%s]%s =%s\n",
 				 trim_prefix(dp->filename), dp->lineno,
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index 3eb3e4722..db283ba4d 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -125,7 +125,10 @@ struct cpio_data find_cpio_data(const char *path, void *data,
 		if ((ch[C_MODE] & 0170000) == 0100000 &&
 		    ch[C_NAMESIZE] >= mypathsize &&
 		    !memcmp(p, path, mypathsize)) {
-			*nextoff = (long)nptr - (long)data;
+
+			if (nextoff)
+				*nextoff = (long)nptr - (long)data;
+
 			if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) {
 				pr_warn(
 				"File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221..43273a7d8 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
+#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned int __sw_hweight32(unsigned int w)
 {
 #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight32);
+#endif
 
 unsigned int __sw_hweight16(unsigned int w)
 {
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
 }
 EXPORT_SYMBOL(__sw_hweight8);
 
+#ifndef __HAVE_ARCH_SW_HWEIGHT
 unsigned long __sw_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
 #endif
 }
 EXPORT_SYMBOL(__sw_hweight64);
+#endif
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index c27e26921..a816f3a80 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -29,8 +29,7 @@ again:
 	index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
 	if (index < size) {
 		if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
-			/* we could do more effectively */
-			start = index + 1;
+			start = ALIGN(shift + index, boundary_size) - shift;
 			goto again;
 		}
 		bitmap_set(map, index, nr);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index eaaf73032..7e3138cfc 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -56,37 +56,24 @@
 	n = wanted;					\
 }
 
-#define iterate_bvec(i, n, __v, __p, skip, STEP) {	\
-	size_t wanted = n;				\
-	__p = i->bvec;					\
-	__v.bv_len = min_t(size_t, n, __p->bv_len - skip);	\
-	if (likely(__v.bv_len)) {			\
-		__v.bv_page = __p->bv_page;		\
-		__v.bv_offset = __p->bv_offset + skip; 	\
-		(void)(STEP);				\
-		skip += __v.bv_len;			\
-		n -= __v.bv_len;			\
-	}						\
-	while (unlikely(n)) {				\
-		__p++;					\
-		__v.bv_len = min_t(size_t, n, __p->bv_len);	\
-		if (unlikely(!__v.bv_len))		\
+#define iterate_bvec(i, n, __v, __bi, skip, STEP) {	\
+	struct bvec_iter __start;			\
+	__start.bi_size = n;				\
+	__start.bi_bvec_done = skip;			\
+	__start.bi_idx = 0;				\
+	for_each_bvec(__v, i->bvec, __bi, __start) {	\
+		if (!__v.bv_len)			\
 			continue;			\
-		__v.bv_page = __p->bv_page;		\
-		__v.bv_offset = __p->bv_offset;		\
 		(void)(STEP);				\
-		skip = __v.bv_len;			\
-		n -= __v.bv_len;			\
 	}						\
-	n = wanted;					\
 }
 
 #define iterate_all_kinds(i, n, v, I, B, K) {			\
 	size_t skip = i->iov_offset;				\
 	if (unlikely(i->type & ITER_BVEC)) {			\
-		const struct bio_vec *bvec;			\
 		struct bio_vec v;				\
-		iterate_bvec(i, n, v, bvec, skip, (B))		\
+		struct bvec_iter __bi;				\
+		iterate_bvec(i, n, v, __bi, skip, (B))		\
 	} else if (unlikely(i->type & ITER_KVEC)) {		\
 		const struct kvec *kvec;			\
 		struct kvec v;					\
@@ -104,15 +91,13 @@
 	if (i->count) {						\
 		size_t skip = i->iov_offset;			\
 		if (unlikely(i->type & ITER_BVEC)) {		\
-			const struct bio_vec *bvec;		\
+			const struct bio_vec *bvec = i->bvec;	\
 			struct bio_vec v;			\
-			iterate_bvec(i, n, v, bvec, skip, (B))	\
-			if (skip == bvec->bv_len) {		\
-				bvec++;				\
-				skip = 0;			\
-			}					\
-			i->nr_segs -= bvec - i->bvec;		\
-			i->bvec = bvec;				\
+			struct bvec_iter __bi;			\
+			iterate_bvec(i, n, v, __bi, skip, (B))	\
+			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
+			i->nr_segs -= i->bvec - bvec;		\
+			skip = __bi.bi_bvec_done;		\
 		} else if (unlikely(i->type & ITER_KVEC)) {	\
 			const struct kvec *kvec;		\
 			struct kvec v;				\
@@ -159,7 +144,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
 	buf = iov->iov_base + skip;
 	copy = min(bytes, iov->iov_len - skip);
 
-	if (!fault_in_pages_writeable(buf, copy)) {
+	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
 		kaddr = kmap_atomic(page);
 		from = kaddr + offset;
 
@@ -190,6 +175,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
 		copy = min(bytes, iov->iov_len - skip);
 	}
 	/* Too bad - revert to non-atomic kmap */
+
 	kaddr = kmap(page);
 	from = kaddr + offset;
 	left = __copy_to_user(buf, from, copy);
@@ -208,6 +194,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
 		bytes -= copy;
 	}
 	kunmap(page);
+
 done:
 	if (skip == iov->iov_len) {
 		iov++;
@@ -240,7 +227,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
 	buf = iov->iov_base + skip;
 	copy = min(bytes, iov->iov_len - skip);
 
-	if (!fault_in_pages_readable(buf, copy)) {
+	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
 		kaddr = kmap_atomic(page);
 		to = kaddr + offset;
 
@@ -271,6 +258,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
 		copy = min(bytes, iov->iov_len - skip);
 	}
 	/* Too bad - revert to non-atomic kmap */
+
 	kaddr = kmap(page);
 	to = kaddr + offset;
 	left = __copy_from_user(to, buf, copy);
@@ -289,6 +277,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
 		bytes -= copy;
 	}
 	kunmap(page);
+
 done:
 	if (skip == iov->iov_len) {
 		iov++;
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 747606f9e..5a0f75a3b 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/count_zeros.h>
 #include <linux/byteorder/generic.h>
+#include <linux/scatterlist.h>
 #include <linux/string.h>
 #include "mpi-internal.h"
 
@@ -50,9 +51,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes)
 		return NULL;
 	}
 	if (nbytes > 0)
-		nbits -= count_leading_zeros(buffer[0]);
-	else
-		nbits = 0;
+		nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8);
 
 	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
 	val = mpi_alloc(nlimbs);
@@ -82,50 +81,30 @@ EXPORT_SYMBOL_GPL(mpi_read_raw_data);
 MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 {
 	const uint8_t *buffer = xbuffer;
-	int i, j;
-	unsigned nbits, nbytes, nlimbs, nread = 0;
-	mpi_limb_t a;
-	MPI val = NULL;
+	unsigned int nbits, nbytes;
+	MPI val;
 
 	if (*ret_nread < 2)
-		goto leave;
+		return ERR_PTR(-EINVAL);
 	nbits = buffer[0] << 8 | buffer[1];
 
 	if (nbits > MAX_EXTERN_MPI_BITS) {
 		pr_info("MPI: mpi too large (%u bits)\n", nbits);
-		goto leave;
+		return ERR_PTR(-EINVAL);
 	}
-	buffer += 2;
-	nread = 2;
 
 	nbytes = DIV_ROUND_UP(nbits, 8);
-	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
-	val = mpi_alloc(nlimbs);
-	if (!val)
-		return NULL;
-	i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
-	i %= BYTES_PER_MPI_LIMB;
-	val->nbits = nbits;
-	j = val->nlimbs = nlimbs;
-	val->sign = 0;
-	for (; j > 0; j--) {
-		a = 0;
-		for (; i < BYTES_PER_MPI_LIMB; i++) {
-			if (++nread > *ret_nread) {
-				printk
-				    ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
-				     nread, *ret_nread);
-				goto leave;
-			}
-			a <<= 8;
-			a |= *buffer++;
-		}
-		i = 0;
-		val->d[j - 1] = a;
+	if (nbytes + 2 > *ret_nread) {
+		pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n",
+				nbytes, *ret_nread);
+		return ERR_PTR(-EINVAL);
 	}
 
-leave:
-	*ret_nread = nread;
+	val = mpi_read_raw_data(buffer + 2, nbytes);
+	if (!val)
+		return ERR_PTR(-ENOMEM);
+
+	*ret_nread = nbytes + 2;
 	return val;
 }
 EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
@@ -250,82 +229,6 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
 }
 EXPORT_SYMBOL_GPL(mpi_get_buffer);
 
-/****************
- * Use BUFFER to update MPI.
- */
-int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
-{
-	const uint8_t *buffer = xbuffer, *p;
-	mpi_limb_t alimb;
-	int nlimbs;
-	int i;
-
-	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
-	if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
-		return -ENOMEM;
-	a->sign = sign;
-
-	for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
-#if BYTES_PER_MPI_LIMB == 4
-		alimb = (mpi_limb_t) *p--;
-		alimb |= (mpi_limb_t) *p-- << 8;
-		alimb |= (mpi_limb_t) *p-- << 16;
-		alimb |= (mpi_limb_t) *p-- << 24;
-#elif BYTES_PER_MPI_LIMB == 8
-		alimb = (mpi_limb_t) *p--;
-		alimb |= (mpi_limb_t) *p-- << 8;
-		alimb |= (mpi_limb_t) *p-- << 16;
-		alimb |= (mpi_limb_t) *p-- << 24;
-		alimb |= (mpi_limb_t) *p-- << 32;
-		alimb |= (mpi_limb_t) *p-- << 40;
-		alimb |= (mpi_limb_t) *p-- << 48;
-		alimb |= (mpi_limb_t) *p-- << 56;
-#else
-#error please implement for this limb size.
-#endif
-		a->d[i++] = alimb;
-	}
-	if (p >= buffer) {
-#if BYTES_PER_MPI_LIMB == 4
-		alimb = *p--;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 8;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 16;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 24;
-#elif BYTES_PER_MPI_LIMB == 8
-		alimb = (mpi_limb_t) *p--;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 8;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 16;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 24;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 32;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 40;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 48;
-		if (p >= buffer)
-			alimb |= (mpi_limb_t) *p-- << 56;
-#else
-#error please implement for this limb size.
-#endif
-		a->d[i++] = alimb;
-	}
-	a->nlimbs = i;
-
-	if (i != nlimbs) {
-		pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
-		       nlimbs);
-		BUG();
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mpi_set_buffer);
-
 /**
  * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first)
  *
@@ -335,16 +238,13 @@ EXPORT_SYMBOL_GPL(mpi_set_buffer);
  * @a:		a multi precision integer
  * @sgl:	scatterlist to write to. Needs to be at least
  *		mpi_get_size(a) long.
- * @nbytes:	in/out param - it has the be set to the maximum number of
- *		bytes that can be written to sgl. This has to be at least
- *		the size of the integer a. On return it receives the actual
- *		length of the data written on success or the data that would
- *		be written if buffer was too small.
+ * @nbytes:	the number of bytes to write.  Leading bytes will be
+ *		filled with zero.
  * @sign:	if not NULL, it will be set to the sign of a.
  *
  * Return:	0 on success or error code in case of error
  */
-int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
+int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes,
 		     int *sign)
 {
 	u8 *p, *p2;
@@ -356,55 +256,60 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
 #error please implement for this limb size.
 #endif
 	unsigned int n = mpi_get_size(a);
-	int i, x, y = 0, lzeros, buf_len;
-
-	if (!nbytes)
-		return -EINVAL;
+	struct sg_mapping_iter miter;
+	int i, x, buf_len;
+	int nents;
 
 	if (sign)
 		*sign = a->sign;
 
-	lzeros = count_lzeros(a);
-
-	if (*nbytes < n - lzeros) {
-		*nbytes = n - lzeros;
+	if (nbytes < n)
 		return -EOVERFLOW;
-	}
 
-	*nbytes = n - lzeros;
-	buf_len = sgl->length;
-	p2 = sg_virt(sgl);
+	nents = sg_nents_for_len(sgl, nbytes);
+	if (nents < 0)
+		return -EINVAL;
 
-	for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
-			lzeros %= BYTES_PER_MPI_LIMB;
-		i >= 0; i--) {
+	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG);
+	sg_miter_next(&miter);
+	buf_len = miter.length;
+	p2 = miter.addr;
+
+	while (nbytes > n) {
+		i = min_t(unsigned, nbytes - n, buf_len);
+		memset(p2, 0, i);
+		p2 += i;
+		nbytes -= i;
+
+		buf_len -= i;
+		if (!buf_len) {
+			sg_miter_next(&miter);
+			buf_len = miter.length;
+			p2 = miter.addr;
+		}
+	}
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
 #if BYTES_PER_MPI_LIMB == 4
-		alimb = cpu_to_be32(a->d[i]);
+		alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0;
 #elif BYTES_PER_MPI_LIMB == 8
-		alimb = cpu_to_be64(a->d[i]);
+		alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0;
 #else
 #error please implement for this limb size.
 #endif
-		if (lzeros) {
-			y = lzeros;
-			lzeros = 0;
-		}
-
-		p = (u8 *)&alimb + y;
+		p = (u8 *)&alimb;
 
-		for (x = 0; x < sizeof(alimb) - y; x++) {
-			if (!buf_len) {
-				sgl = sg_next(sgl);
-				if (!sgl)
-					return -EINVAL;
-				buf_len = sgl->length;
-				p2 = sg_virt(sgl);
-			}
+		for (x = 0; x < sizeof(alimb); x++) {
 			*p2++ = *p++;
-			buf_len--;
+			if (!--buf_len) {
+				sg_miter_next(&miter);
+				buf_len = miter.length;
+				p2 = miter.addr;
+			}
 		}
-		y = 0;
 	}
+
+	sg_miter_stop(&miter);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
@@ -424,19 +329,23 @@ EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
  */
 MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 {
-	struct scatterlist *sg;
-	int x, i, j, z, lzeros, ents;
+	struct sg_mapping_iter miter;
 	unsigned int nbits, nlimbs;
+	int x, j, z, lzeros, ents;
+	unsigned int len;
+	const u8 *buff;
 	mpi_limb_t a;
 	MPI val = NULL;
 
-	lzeros = 0;
-	ents = sg_nents(sgl);
+	ents = sg_nents_for_len(sgl, nbytes);
+	if (ents < 0)
+		return NULL;
 
-	for_each_sg(sgl, sg, ents, i) {
-		const u8 *buff = sg_virt(sg);
-		int len = sg->length;
+	sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG);
 
+	lzeros = 0;
+	len = 0;
+	while (nbytes > 0) {
 		while (len && !*buff) {
 			lzeros++;
 			len--;
@@ -446,12 +355,17 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 		if (len && *buff)
 			break;
 
-		ents--;
+		sg_miter_next(&miter);
+		buff = miter.addr;
+		len = miter.length;
+
 		nbytes -= lzeros;
 		lzeros = 0;
 	}
 
-	sgl = sg;
+	miter.consumed = lzeros;
+	sg_miter_stop(&miter);
+
 	nbytes -= lzeros;
 	nbits = nbytes * 8;
 	if (nbits > MAX_EXTERN_MPI_BITS) {
@@ -460,8 +374,7 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 	}
 
 	if (nbytes > 0)
-		nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros)) -
-			(BITS_PER_LONG - 8);
+		nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8);
 
 	nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
 	val = mpi_alloc(nlimbs);
@@ -480,21 +393,21 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 	z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
 	z %= BYTES_PER_MPI_LIMB;
 
-	for_each_sg(sgl, sg, ents, i) {
-		const u8 *buffer = sg_virt(sg) + lzeros;
-		int len = sg->length - lzeros;
+	while (sg_miter_next(&miter)) {
+		buff = miter.addr;
+		len = miter.length;
 
 		for (x = 0; x < len; x++) {
 			a <<= 8;
-			a |= *buffer++;
+			a |= *buff++;
 			if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) {
 				val->d[j--] = a;
 				a = 0;
 			}
 		}
 		z += x;
-		lzeros = 0;
 	}
+
 	return val;
 }
 EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index bc7852f95..91f0727e3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,6 +38,9 @@
 #include <linux/preempt.h>		/* in_interrupt() */
 
 
+/* Number of nodes in fully populated tree of given height */
+static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
+
 /*
  * Radix tree node cache.
  */
@@ -102,10 +105,10 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent,
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 	if (radix_tree_is_internal_node(entry)) {
-		unsigned long siboff = get_slot_offset(parent, entry);
-		if (siboff < RADIX_TREE_MAP_SIZE) {
-			offset = siboff;
-			entry = rcu_dereference_raw(parent->slots[offset]);
+		if (is_sibling_entry(parent, entry)) {
+			void **sibentry = (void **) entry_to_node(entry);
+			offset = get_slot_offset(parent, sibentry);
+			entry = rcu_dereference_raw(*sibentry);
 		}
 	}
 #endif
@@ -342,7 +345,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  * To make use of this facility, the radix tree must be initialised without
  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
  */
-static int __radix_tree_preload(gfp_t gfp_mask)
+static int __radix_tree_preload(gfp_t gfp_mask, int nr)
 {
 	struct radix_tree_preload *rtp;
 	struct radix_tree_node *node;
@@ -356,14 +359,14 @@ static int __radix_tree_preload(gfp_t gfp_mask)
 
 	preempt_disable();
 	rtp = this_cpu_ptr(&radix_tree_preloads);
-	while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+	while (rtp->nr < nr) {
 		preempt_enable();
 		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
 		if (node == NULL)
 			goto out;
 		preempt_disable();
 		rtp = this_cpu_ptr(&radix_tree_preloads);
-		if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+		if (rtp->nr < nr) {
 			node->private_data = rtp->nodes;
 			rtp->nodes = node;
 			rtp->nr++;
@@ -389,7 +392,7 @@ int radix_tree_preload(gfp_t gfp_mask)
 {
 	/* Warn on non-sensical use... */
 	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
-	return __radix_tree_preload(gfp_mask);
+	return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
 }
 EXPORT_SYMBOL(radix_tree_preload);
 
@@ -401,13 +404,58 @@ EXPORT_SYMBOL(radix_tree_preload);
 int radix_tree_maybe_preload(gfp_t gfp_mask)
 {
 	if (gfpflags_allow_blocking(gfp_mask))
-		return __radix_tree_preload(gfp_mask);
+		return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
 	/* Preloading doesn't help anything with this gfp mask, skip it */
 	preempt_disable();
 	return 0;
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
+/*
+ * The same as function above, but preload number of nodes required to insert
+ * (1 << order) continuous naturally-aligned elements.
+ */
+int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
+{
+	unsigned long nr_subtrees;
+	int nr_nodes, subtree_height;
+
+	/* Preloading doesn't help anything with this gfp mask, skip it */
+	if (!gfpflags_allow_blocking(gfp_mask)) {
+		preempt_disable();
+		return 0;
+	}
+
+	/*
+	 * Calculate number and height of fully populated subtrees it takes to
+	 * store (1 << order) elements.
+	 */
+	nr_subtrees = 1 << order;
+	for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
+			subtree_height++)
+		nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
+
+	/*
+	 * The worst case is zero height tree with a single item at index 0 and
+	 * then inserting items starting at ULONG_MAX - (1 << order).
+	 *
+	 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
+	 * 0-index item.
+	 */
+	nr_nodes = RADIX_TREE_MAX_PATH;
+
+	/* Plus branch to fully populated subtrees. */
+	nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
+
+	/* Root node is shared. */
+	nr_nodes--;
+
+	/* Plus nodes required to build subtrees. */
+	nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
+
+	return __radix_tree_preload(gfp_mask, nr_nodes);
+}
+
 /*
  * The maximum index which can be stored in a radix tree
  */
@@ -1577,6 +1625,31 @@ radix_tree_node_ctor(void *arg)
 	INIT_LIST_HEAD(&node->private_list);
 }
 
+static __init unsigned long __maxindex(unsigned int height)
+{
+	unsigned int width = height * RADIX_TREE_MAP_SHIFT;
+	int shift = RADIX_TREE_INDEX_BITS - width;
+
+	if (shift < 0)
+		return ~0UL;
+	if (shift >= BITS_PER_LONG)
+		return 0UL;
+	return ~0UL >> shift;
+}
+
+static __init void radix_tree_init_maxnodes(void)
+{
+	unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
+	unsigned int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+		height_to_maxindex[i] = __maxindex(i);
+	for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
+		for (j = i; j > 0; j--)
+			height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
+	}
+}
+
 static int radix_tree_callback(struct notifier_block *nfb,
 				unsigned long action, void *hcpu)
 {
@@ -1603,5 +1676,6 @@ void __init radix_tree_init(void)
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
 			radix_tree_node_ctor);
+	radix_tree_init_maxnodes();
 	hotcpu_notifier(radix_tree_callback, 0);
 }
diff --git a/lib/random32.c b/lib/random32.c
index 510d1ce7d..69ed593aa 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -233,7 +233,6 @@ static void __prandom_timer(unsigned long dontcare)
 
 static void __init __prandom_start_seed_timer(void)
 {
-	set_timer_slack(&seed_timer, HZ);
 	seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
 	add_timer(&seed_timer);
 }
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 2c5de8646..08f8043ca 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -46,12 +46,14 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
 		rs->begin = jiffies;
 
 	if (time_is_before_jiffies(rs->begin + rs->interval)) {
-		if (rs->missed)
-			printk(KERN_WARNING "%s: %d callbacks suppressed\n",
-				func, rs->missed);
+		if (rs->missed) {
+			if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+				pr_warn("%s: %d callbacks suppressed\n", func, rs->missed);
+				rs->missed = 0;
+			}
+		}
 		rs->begin   = jiffies;
 		rs->printed = 0;
-		rs->missed  = 0;
 	}
 	if (rs->burst && rs->burst > rs->printed) {
 		rs->printed++;
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1356454e3..eb8a19fee 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -539,17 +539,39 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
 {
 	struct rb_node *parent = rb_parent(victim);
 
+	/* Copy the pointers/colour from the victim to the replacement */
+	*new = *victim;
+
 	/* Set the surrounding nodes to point to the replacement */
-	__rb_change_child(victim, new, parent, root);
 	if (victim->rb_left)
 		rb_set_parent(victim->rb_left, new);
 	if (victim->rb_right)
 		rb_set_parent(victim->rb_right, new);
+	__rb_change_child(victim, new, parent, root);
+}
+EXPORT_SYMBOL(rb_replace_node);
+
+void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+			 struct rb_root *root)
+{
+	struct rb_node *parent = rb_parent(victim);
 
 	/* Copy the pointers/colour from the victim to the replacement */
 	*new = *victim;
+
+	/* Set the surrounding nodes to point to the replacement */
+	if (victim->rb_left)
+		rb_set_parent(victim->rb_left, new);
+	if (victim->rb_right)
+		rb_set_parent(victim->rb_right, new);
+
+	/* Set the parent's pointer to the new node last after an RCU barrier
+	 * so that the pointers onwards are seen to be set correctly when doing
+	 * an RCU walk over the tree.
+	 */
+	__rb_change_child_rcu(victim, new, parent, root);
 }
-EXPORT_SYMBOL(rb_replace_node);
+EXPORT_SYMBOL(rb_replace_node_rcu);
 
 static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
 {
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 5d845ffd7..56054e541 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -30,7 +30,7 @@
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4U
-#define BUCKET_LOCKS_PER_CPU   128UL
+#define BUCKET_LOCKS_PER_CPU	32UL
 
 static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
@@ -70,21 +70,25 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
 	unsigned int nr_pcpus = num_possible_cpus();
 #endif
 
-	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+	nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
 	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
 
 	/* Never allocate more than 0.5 locks per bucket */
 	size = min_t(unsigned int, size, tbl->size >> 1);
 
 	if (sizeof(spinlock_t) != 0) {
+		tbl->locks = NULL;
 #ifdef CONFIG_NUMA
 		if (size * sizeof(spinlock_t) > PAGE_SIZE &&
 		    gfp == GFP_KERNEL)
 			tbl->locks = vmalloc(size * sizeof(spinlock_t));
-		else
 #endif
-		tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-					   gfp);
+		if (gfp != GFP_KERNEL)
+			gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
+		if (!tbl->locks)
+			tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+						   gfp);
 		if (!tbl->locks)
 			return -ENOMEM;
 		for (i = 0; i < size; i++)
@@ -321,12 +325,14 @@ static int rhashtable_expand(struct rhashtable *ht)
 static int rhashtable_shrink(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-	unsigned int size;
+	unsigned int nelems = atomic_read(&ht->nelems);
+	unsigned int size = 0;
 	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+	if (nelems)
+		size = roundup_pow_of_two(nelems * 3 / 2);
 	if (size < ht->p.min_size)
 		size = ht->p.min_size;
 
diff --git a/lib/sradix-tree.c b/lib/sradix-tree.c
new file mode 100644
index 000000000..8d0632917
--- /dev/null
+++ b/lib/sradix-tree.c
@@ -0,0 +1,476 @@
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/gcd.h>
+#include <linux/sradix-tree.h>
+
+static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node)
+{
+	return node->fulls == root->stores_size || 
+		(node->height == 1 && node->count == root->stores_size);
+}
+
+/*
+ *	Extend a sradix tree so it can store key @index.
+ */
+static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index)
+{
+	struct sradix_tree_node *node;
+	unsigned int height;
+
+	if (unlikely(root->rnode == NULL)) {
+		if (!(node = root->alloc()))
+			return -ENOMEM;
+
+		node->height = 1;
+		root->rnode = node;
+		root->height = 1;
+	}
+
+	/* Figure out what the height should be.  */
+	height = root->height;
+	index >>= root->shift * height;
+
+	while (index) {
+		index >>= root->shift;
+		height++;
+	}
+
+	while (height > root->height) {
+		unsigned int newheight;
+		if (!(node = root->alloc()))
+			return -ENOMEM;
+
+		/* Increase the height.  */
+		node->stores[0] = root->rnode;
+		root->rnode->parent = node;
+		if (root->extend)
+			root->extend(node, root->rnode);
+
+		newheight = root->height + 1;
+		node->height = newheight;
+		node->count = 1;
+		if (sradix_node_full(root, root->rnode))
+			node->fulls = 1;
+
+		root->rnode = node;
+		root->height = newheight;
+	}
+
+	return 0;
+}
+
+/*
+ * Search the next item from the current node, that is not NULL
+ * and can satify root->iter().
+ */
+void *sradix_tree_next(struct sradix_tree_root *root,
+		       struct sradix_tree_node *node, unsigned long index,
+		       int (*iter)(void *item, unsigned long height))
+{
+	unsigned long offset;
+	void *item;
+
+	if (unlikely(node == NULL)) {
+		node = root->rnode;
+		for (offset = 0; offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (unlikely(offset >= root->stores_size))
+			return NULL;
+
+		if (node->height == 1)
+			return item;
+		else
+			goto go_down;
+	}
+
+	while (node) {
+		offset = (index & root->mask) + 1;					
+		for (;offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (offset < root->stores_size)
+			break;
+
+		node = node->parent;
+		index >>= root->shift;
+	}
+
+	if (!node)
+		return NULL;
+
+	while (node->height > 1) {
+go_down:
+		node = item;
+		for (offset = 0; offset < root->stores_size; offset++) {
+			item = node->stores[offset];
+			if (item && (!iter || iter(item, node->height)))
+				break;
+		}
+
+		if (unlikely(offset >= root->stores_size))
+			return NULL;
+	}
+
+	BUG_ON(offset > root->stores_size);
+
+	return item;
+}
+
+/*
+ * Blindly insert the item to the tree. Typically, we reuse the
+ * first empty store item.
+ */
+int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num)
+{
+	unsigned long index;
+	unsigned int height;
+	struct sradix_tree_node *node, *tmp = NULL;
+	int offset, offset_saved;
+	void **store = NULL;
+	int error, i, j, shift;
+
+go_on:
+	index = root->min;
+
+	if (root->enter_node && !sradix_node_full(root, root->enter_node)) {
+		node = root->enter_node;
+		BUG_ON((index >> (root->shift * root->height)));
+	} else {
+		node = root->rnode;
+		if (node == NULL || (index >> (root->shift * root->height))
+		    || sradix_node_full(root, node)) {
+			error = sradix_tree_extend(root, index);
+			if (error)
+				return error;
+
+			node = root->rnode;
+		}
+	}
+
+
+	height = node->height;
+	shift = (height - 1) * root->shift;
+	offset = (index >> shift) & root->mask;
+	while (shift > 0) {
+		offset_saved = offset;
+		for (; offset < root->stores_size; offset++) {
+			store = &node->stores[offset];
+			tmp = *store;
+
+			if (!tmp || !sradix_node_full(root, tmp))
+				break;
+		}
+		BUG_ON(offset >= root->stores_size);
+
+		if (offset != offset_saved) {
+			index += (offset - offset_saved) << shift;
+			index &= ~((1UL << shift) - 1);
+		}
+
+		if (!tmp) {
+			if (!(tmp = root->alloc()))
+				return -ENOMEM;
+
+			tmp->height = shift / root->shift;
+			*store = tmp;
+			tmp->parent = node;
+			node->count++;
+//			if (root->extend)
+//				root->extend(node, tmp);
+		}
+
+		node = tmp;
+		shift -= root->shift;
+		offset = (index >> shift) & root->mask;
+	}
+
+	BUG_ON(node->height != 1);
+
+
+	store = &node->stores[offset];
+	for (i = 0, j = 0;
+	      j < root->stores_size - node->count && 
+	      i < root->stores_size - offset && j < num; i++) {
+		if (!store[i]) {
+			store[i] = item[j];
+			if (root->assign)
+				root->assign(node, index + i, item[j]);
+			j++;
+		}
+	}
+
+	node->count += j;
+	root->num += j;
+	num -= j;
+
+	while (sradix_node_full(root, node)) {
+		node = node->parent;
+		if (!node)
+			break;
+
+		node->fulls++;
+	}
+
+	if (unlikely(!node)) {
+		/* All nodes are full */
+		root->min = 1 << (root->height * root->shift);
+		root->enter_node = NULL;
+	} else {
+		root->min = index + i - 1;
+		root->min |= (1UL << (node->height - 1)) - 1;
+		root->min++;
+		root->enter_node = node;
+	}
+
+	if (num) {
+		item += j;
+		goto go_on;
+	}
+
+	return 0;
+}
+
+
+/**
+ *	sradix_tree_shrink    -    shrink height of a sradix tree to minimal
+ *      @root		sradix tree root
+ *  
+ */
+static inline void sradix_tree_shrink(struct sradix_tree_root *root)
+{
+	/* try to shrink tree height */
+	while (root->height > 1) {
+		struct sradix_tree_node *to_free = root->rnode;
+
+		/*
+		 * The candidate node has more than one child, or its child
+		 * is not at the leftmost store, we cannot shrink.
+		 */
+		if (to_free->count != 1 || !to_free->stores[0])
+			break;
+
+		root->rnode = to_free->stores[0];
+		root->rnode->parent = NULL;
+		root->height--;
+		if (unlikely(root->enter_node == to_free)) {
+			root->enter_node = NULL;
+		}
+		root->free(to_free);
+	}
+}
+
+/*
+ * Del the item on the known leaf node and index
+ */
+void sradix_tree_delete_from_leaf(struct sradix_tree_root *root, 
+				  struct sradix_tree_node *node, unsigned long index)
+{
+	unsigned int offset;
+	struct sradix_tree_node *start, *end;
+
+	BUG_ON(node->height != 1);
+
+	start = node;
+	while (node && !(--node->count))
+		node = node->parent;
+
+	end = node;
+	if (!node) {
+		root->rnode = NULL;
+		root->height = 0;
+		root->min = 0;
+		root->num = 0;
+		root->enter_node = NULL;
+	} else {
+		offset = (index >> (root->shift * (node->height - 1))) & root->mask;
+		if (root->rm)
+			root->rm(node, offset);
+		node->stores[offset] = NULL;
+		root->num--;
+		if (root->min > index) {
+			root->min = index;
+			root->enter_node = node;
+		}
+	}
+
+	if (start != end) {
+		do {
+			node = start;
+			start = start->parent;
+			if (unlikely(root->enter_node == node))
+				root->enter_node = end;
+			root->free(node);
+		} while (start != end);
+
+		/*
+		 * Note that shrink may free "end", so enter_node still need to
+		 * be checked inside.
+		 */
+		sradix_tree_shrink(root);
+	} else if (node->count == root->stores_size - 1) {
+		/* It WAS a full leaf node. Update the ancestors */
+		node = node->parent;
+		while (node) {
+			node->fulls--;
+			if (node->fulls != root->stores_size - 1)
+				break;
+
+			node = node->parent;
+		}
+	}
+}
+
+void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index)
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node;
+	int shift;
+
+	node = root->rnode;
+	if (node == NULL || (index >> (root->shift * root->height)))
+		return NULL;
+
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		node = node->stores[offset];
+		if (!node)
+			return NULL;
+
+		shift -= root->shift;
+	} while (shift >= 0);
+
+	return node;
+}
+
+/*
+ * Return the item if it exists, otherwise create it in place
+ * and return the created item.
+ */
+void *sradix_tree_lookup_create(struct sradix_tree_root *root, 
+			unsigned long index, void *(*item_alloc)(void))
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node, *tmp;
+	void *item;
+	int shift, error;
+
+	if (root->rnode == NULL || (index >> (root->shift * root->height))) {
+		if (item_alloc) {
+			error = sradix_tree_extend(root, index);
+			if (error)
+				return NULL;
+		} else {
+			return NULL;
+		}
+	}
+
+	node = root->rnode;
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		if (!node->stores[offset]) {
+			if (!(tmp = root->alloc()))
+				return NULL;
+
+			tmp->height = shift / root->shift;
+			node->stores[offset] = tmp;
+			tmp->parent = node;
+			node->count++;
+			node = tmp;
+		} else {
+			node = node->stores[offset];
+		}
+
+		shift -= root->shift;
+	} while (shift > 0);
+
+	BUG_ON(node->height != 1);
+	offset = index & root->mask;
+	if (node->stores[offset]) {
+		return node->stores[offset];
+	} else if (item_alloc) {
+		if (!(item = item_alloc()))
+			return NULL;
+
+		node->stores[offset] = item;
+
+		/*
+		 * NOTE: we do NOT call root->assign here, since this item is
+		 * newly created by us having no meaning. Caller can call this
+		 * if it's necessary to do so.
+		 */
+
+		node->count++;
+		root->num++;
+
+		while (sradix_node_full(root, node)) {
+			node = node->parent;
+			if (!node)
+				break;
+
+			node->fulls++;
+		}
+
+		if (unlikely(!node)) {
+			/* All nodes are full */
+			root->min = 1 << (root->height * root->shift);
+		} else {
+			if (root->min == index) {
+				root->min |= (1UL << (node->height - 1)) - 1;
+				root->min++;
+				root->enter_node = node;
+			}
+		}
+
+		return item;
+	} else {
+		return NULL;
+	}
+
+}
+
+int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index)
+{
+	unsigned int height, offset;
+	struct sradix_tree_node *node;
+	int shift;
+
+	node = root->rnode;
+	if (node == NULL || (index >> (root->shift * root->height)))
+		return -ENOENT;
+
+	height = root->height;
+	shift = (height - 1) * root->shift;
+
+	do {
+		offset = (index >> shift) & root->mask;
+		node = node->stores[offset];
+		if (!node)
+			return -ENOENT;
+
+		shift -= root->shift;
+	} while (shift > 0);
+
+	offset = index & root->mask;
+	if (!node->stores[offset])
+		return -ENOENT;
+
+	sradix_tree_delete_from_leaf(root, node, index);
+
+	return 0;
+}
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 53ad6c083..60f77f1d4 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -242,6 +242,7 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
 		 */
 		alloc_flags &= ~GFP_ZONEMASK;
 		alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+		alloc_flags |= __GFP_NOWARN;
 		page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
 		if (page)
 			prealloc = page_address(page);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 33f655ef4..9c5fe8110 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -40,8 +40,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
 		unsigned long c, data;
 
 		/* Fall back to byte-at-a-time if we get a page fault */
-		if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
-			break;
+		unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
+
 		*(unsigned long *)(dst+res) = c;
 		if (has_zero(c, &data, &constants)) {
 			data = prep_zero_mask(c, data, &constants);
@@ -56,8 +56,7 @@ byte_at_a_time:
 	while (max) {
 		char c;
 
-		if (unlikely(unsafe_get_user(c,src+res)))
-			return -EFAULT;
+		unsafe_get_user(c,src+res, efault);
 		dst[res] = c;
 		if (!c)
 			return res;
@@ -76,6 +75,7 @@ byte_at_a_time:
 	 * Nope: we hit the address space limit, and we still had more
 	 * characters the caller would have wanted. That's an EFAULT.
 	 */
+efault:
 	return -EFAULT;
 }
 
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 262594362..8e105ed4d 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
 	src -= align;
 	max += align;
 
-	if (unlikely(unsafe_get_user(c,(unsigned long __user *)src)))
-		return 0;
+	unsafe_get_user(c, (unsigned long __user *)src, efault);
 	c |= aligned_byte_mask(align);
 
 	for (;;) {
@@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
 		if (unlikely(max <= sizeof(unsigned long)))
 			break;
 		max -= sizeof(unsigned long);
-		if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
-			return 0;
+		unsafe_get_user(c, (unsigned long __user *)(src+res), efault);
 	}
 	res -= align;
 
@@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
 	 * Nope: we hit the address space limit, and we still had more
 	 * characters the caller would have wanted. That's 0.
 	 */
+efault:
 	return 0;
 }
 
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 76f29ecba..22e13a0e1 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -738,7 +738,7 @@ swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
 dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    unsigned long offset, size_t size,
 			    enum dma_data_direction dir,
-			    struct dma_attrs *attrs)
+			    unsigned long attrs)
 {
 	phys_addr_t map, phys = page_to_phys(page) + offset;
 	dma_addr_t dev_addr = phys_to_dma(dev, phys);
@@ -807,7 +807,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 
 void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 			size_t size, enum dma_data_direction dir,
-			struct dma_attrs *attrs)
+			unsigned long attrs)
 {
 	unmap_single(hwdev, dev_addr, size, dir);
 }
@@ -877,7 +877,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_device);
  */
 int
 swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
-		     enum dma_data_direction dir, struct dma_attrs *attrs)
+		     enum dma_data_direction dir, unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -914,7 +914,7 @@ int
 swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	       enum dma_data_direction dir)
 {
-	return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
+	return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, 0);
 }
 EXPORT_SYMBOL(swiotlb_map_sg);
 
@@ -924,7 +924,8 @@ EXPORT_SYMBOL(swiotlb_map_sg);
  */
 void
 swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-		       int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
+		       int nelems, enum dma_data_direction dir,
+		       unsigned long attrs)
 {
 	struct scatterlist *sg;
 	int i;
@@ -941,7 +942,7 @@ void
 swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
 		 enum dma_data_direction dir)
 {
-	return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
+	return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, 0);
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg);
 
diff --git a/lib/test_hash.c b/lib/test_hash.c
index c9549c8b4..cac20c5fb 100644
--- a/lib/test_hash.c
+++ b/lib/test_hash.c
@@ -143,7 +143,7 @@ static int __init
 test_hash_init(void)
 {
 	char buf[SIZE+1];
-	u32 string_or = 0, hash_or[2][33] = { 0 };
+	u32 string_or = 0, hash_or[2][33] = { { 0, } };
 	unsigned tests = 0;
 	unsigned long long h64 = 0;
 	int i, j;
@@ -155,8 +155,8 @@ test_hash_init(void)
 		buf[j] = '\0';
 
 		for (i = 0; i <= j; i++) {
-			u64 hashlen = hashlen_string(buf+i);
-			u32 h0 = full_name_hash(buf+i, j-i);
+			u64 hashlen = hashlen_string(buf+i, buf+i);
+			u32 h0 = full_name_hash(buf+i, buf+i, j-i);
 
 			/* Check that hashlen_string gets the length right */
 			if (hashlen_len(hashlen) != j-i) {
@@ -219,21 +219,27 @@ test_hash_init(void)
 	}
 
 	/* Issue notices about skipped tests. */
-#ifndef HAVE_ARCH__HASH_32
-	pr_info("__hash_32() has no arch implementation to test.");
-#elif HAVE_ARCH__HASH_32 != 1
+#ifdef HAVE_ARCH__HASH_32
+#if HAVE_ARCH__HASH_32 != 1
 	pr_info("__hash_32() is arch-specific; not compared to generic.");
 #endif
-#ifndef HAVE_ARCH_HASH_32
-	pr_info("hash_32() has no arch implementation to test.");
-#elif HAVE_ARCH_HASH_32 != 1
+#else
+	pr_info("__hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_32
+#if HAVE_ARCH_HASH_32 != 1
 	pr_info("hash_32() is arch-specific; not compared to generic.");
 #endif
-#ifndef HAVE_ARCH_HASH_64
-	pr_info("hash_64() has no arch implementation to test.");
-#elif HAVE_ARCH_HASH_64 != 1
+#else
+	pr_info("hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_64
+#if HAVE_ARCH_HASH_64 != 1
 	pr_info("hash_64() is arch-specific; not compared to generic.");
 #endif
+#else
+	pr_info("hash_64() has no arch implementation to test.");
+#endif
 
 	pr_notice("%u tests passed.", tests);
 
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 297fdb5e7..64e899b63 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)");
 
 static int max_size = 0;
 module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
+MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
 
 static bool shrinking = false;
 module_param(shrinking, bool, 0);
diff --git a/lib/ubsan.c b/lib/ubsan.c
index 8799ae5e2..fb0409df1 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -308,7 +308,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data,
 		return;
 
 	ubsan_prologue(&data->location, &flags);
-	pr_err("%s address %pk with insufficient space\n",
+	pr_err("%s address %p with insufficient space\n",
 		type_check_kinds[data->type_check_kind],
 		(void *) ptr);
 	pr_err("for an object of type %s\n", data->type->type_name);
diff --git a/lib/wbt.c b/lib/wbt.c
index cc5a24270..257c7b099 100644
--- a/lib/wbt.c
+++ b/lib/wbt.c
@@ -1,5 +1,5 @@
 /*
- * buffered writeback throttling. losely based on CoDel. We can't drop
+ * buffered writeback throttling. loosely based on CoDel. We can't drop
  * packets for IO scheduling, so the logic is something like this:
  *
  * - Monitor latencies in a defined window of time.
@@ -9,30 +9,31 @@
  * - For any window where we don't have solid data on what the latencies
  *   look like, retain status quo.
  * - If latencies look good, decrement scaling step.
+ * - If we're only doing writes, allow the scaling step to go negative. This
+ *   will temporarily boost write performance, snapping back to a stable
+ *   scaling step of 0 if reads show up or the heavy writers finish. Unlike
+ *   positive scaling steps where we shrink the monitoring window, a negative
+ *   scaling step retains the default step==0 window size.
  *
  * Copyright (C) 2016 Jens Axboe
  *
- * Things that (may) need changing:
- *
- *	- Different scaling of background/normal/high priority writeback.
- *	  We may have to violate guarantees for max.
- *	- We can have mismatches between the stat window and our window.
- *
  */
 #include <linux/kernel.h>
 #include <linux/blk_types.h>
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
 #include <linux/wbt.h>
+#include <linux/swap.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/wbt.h>
 
 enum {
 	/*
-	 * Might need to be higher
+	 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
+	 * from here depending on device stats
 	 */
-	RWB_MAX_DEPTH	= 64,
+	RWB_DEF_DEPTH	= 16,
 
 	/*
 	 * 100msec window
@@ -40,10 +41,9 @@ enum {
 	RWB_WINDOW_NSEC		= 100 * 1000 * 1000ULL,
 
 	/*
-	 * Disregard stats, if we don't meet these minimums
+	 * Disregard stats, if we don't meet this minimum
 	 */
 	RWB_MIN_WRITE_SAMPLES	= 3,
-	RWB_MIN_READ_SAMPLES	= 1,
 
 	/*
 	 * If we have this number of consecutive windows with not enough
@@ -89,18 +89,51 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
 	}
 }
 
-void __wbt_done(struct rq_wb *rwb)
+/*
+ * If a task was rate throttled in balance_dirty_pages() within the last
+ * second or so, use that to indicate a higher cleaning rate.
+ */
+static bool wb_recent_wait(struct rq_wb *rwb)
+{
+	struct bdi_writeback *wb = &rwb->bdi->wb;
+
+	return time_before(jiffies, wb->dirty_sleep + HZ);
+}
+
+static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd)
+{
+	return &rwb->rq_wait[is_kswapd];
+}
+
+static void rwb_wake_all(struct rq_wb *rwb)
+{
+	int i;
+
+	for (i = 0; i < WBT_NUM_RWQ; i++) {
+		struct rq_wait *rqw = &rwb->rq_wait[i];
+
+		if (waitqueue_active(&rqw->wait))
+			wake_up_all(&rqw->wait);
+	}
+}
+
+void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
 {
+	struct rq_wait *rqw;
 	int inflight, limit;
 
-	inflight = atomic_dec_return(&rwb->inflight);
+	if (!(wb_acct & WBT_TRACKED))
+		return;
+
+	rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD);
+	inflight = atomic_dec_return(&rqw->inflight);
 
 	/*
 	 * wbt got disabled with IO in flight. Wake up any potential
 	 * waiters, we don't have to do more than that.
 	 */
 	if (unlikely(!rwb_enabled(rwb))) {
-		wake_up_all(&rwb->wait);
+		rwb_wake_all(rwb);
 		return;
 	}
 
@@ -108,7 +141,7 @@ void __wbt_done(struct rq_wb *rwb)
 	 * If the device does write back caching, drop further down
 	 * before we wake people up.
 	 */
-	if (rwb->wc && !atomic_read(&rwb->bdi->wb.dirty_sleeping))
+	if (rwb->wc && !wb_recent_wait(rwb))
 		limit = 0;
 	else
 		limit = rwb->wb_normal;
@@ -119,11 +152,11 @@ void __wbt_done(struct rq_wb *rwb)
 	if (inflight && inflight >= limit)
 		return;
 
-	if (waitqueue_active(&rwb->wait)) {
+	if (waitqueue_active(&rqw->wait)) {
 		int diff = limit - inflight;
 
 		if (!inflight || diff >= rwb->wb_background / 2)
-			wake_up_nr(&rwb->wait, 1);
+			wake_up(&rqw->wait);
 	}
 }
 
@@ -136,27 +169,33 @@ void wbt_done(struct rq_wb *rwb, struct wb_issue_stat *stat)
 	if (!rwb)
 		return;
 
-	if (!wbt_tracked(stat)) {
+	if (!wbt_is_tracked(stat)) {
 		if (rwb->sync_cookie == stat) {
 			rwb->sync_issue = 0;
 			rwb->sync_cookie = NULL;
 		}
 
-		wb_timestamp(rwb, &rwb->last_comp);
+		if (wbt_is_read(stat))
+			wb_timestamp(rwb, &rwb->last_comp);
+		wbt_clear_state(stat);
 	} else {
 		WARN_ON_ONCE(stat == rwb->sync_cookie);
-		__wbt_done(rwb);
-		wbt_clear_tracked(stat);
+		__wbt_done(rwb, wbt_stat_to_mask(stat));
+		wbt_clear_state(stat);
 	}
 }
 
-static void calc_wb_limits(struct rq_wb *rwb)
+/*
+ * Return true, if we can't increase the depth further by scaling
+ */
+static bool calc_wb_limits(struct rq_wb *rwb)
 {
 	unsigned int depth;
+	bool ret = false;
 
 	if (!rwb->min_lat_nsec) {
 		rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0;
-		return;
+		return false;
 	}
 
 	/*
@@ -167,22 +206,44 @@ static void calc_wb_limits(struct rq_wb *rwb)
 	 * scaling down, then keep a setting of 1/1/1.
 	 */
 	if (rwb->queue_depth == 1) {
-		if (rwb->scale_step)
+		if (rwb->scale_step > 0)
 			rwb->wb_max = rwb->wb_normal = 1;
-		else
+		else {
 			rwb->wb_max = rwb->wb_normal = 2;
+			ret = true;
+		}
 		rwb->wb_background = 1;
 	} else {
-		depth = min_t(unsigned int, RWB_MAX_DEPTH, rwb->queue_depth);
+		/*
+		 * scale_step == 0 is our default state. If we have suffered
+		 * latency spikes, step will be > 0, and we shrink the
+		 * allowed write depths. If step is < 0, we're only doing
+		 * writes, and we allow a temporarily higher depth to
+		 * increase performance.
+		 */
+		depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth);
+		if (rwb->scale_step > 0)
+			depth = 1 + ((depth - 1) >> min(31, rwb->scale_step));
+		else if (rwb->scale_step < 0) {
+			unsigned int maxd = 3 * rwb->queue_depth / 4;
+
+			depth = 1 + ((depth - 1) << -rwb->scale_step);
+			if (depth > maxd) {
+				depth = maxd;
+				ret = true;
+			}
+		}
 
 		/*
 		 * Set our max/normal/bg queue depths based on how far
 		 * we have scaled down (->scale_step).
 		 */
-		rwb->wb_max = 1 + ((depth - 1) >> min(31U, rwb->scale_step));
+		rwb->wb_max = depth;
 		rwb->wb_normal = (rwb->wb_max + 1) / 2;
 		rwb->wb_background = (rwb->wb_max + 3) / 4;
 	}
+
+	return ret;
 }
 
 static bool inline stat_sample_valid(struct blk_rq_stat *stat)
@@ -209,8 +270,9 @@ static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
 }
 
 enum {
-	LAT_OK,
+	LAT_OK = 1,
 	LAT_UNKNOWN,
+	LAT_UNKNOWN_WRITES,
 	LAT_EXCEEDED,
 };
 
@@ -234,8 +296,21 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
 		return LAT_EXCEEDED;
 	}
 
-	if (!stat_sample_valid(stat))
+	/*
+	 * No read/write mix, if stat isn't valid
+	 */
+	if (!stat_sample_valid(stat)) {
+		/*
+		 * If we had writes in this stat window and the window is
+		 * current, we're only doing writes. If a task recently
+		 * waited or still has writes in flights, consider us doing
+		 * just writes as well.
+		 */
+		if ((stat[1].nr_samples && rwb->stat_ops->is_current(stat)) ||
+		    wb_recent_wait(rwb) || wbt_inflight(rwb))
+			return LAT_UNKNOWN_WRITES;
 		return LAT_UNKNOWN;
+	}
 
 	/*
 	 * If the 'min' latency exceeds our target, step down.
@@ -269,23 +344,27 @@ static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
 static void scale_up(struct rq_wb *rwb)
 {
 	/*
-	 * If we're at 0, we can't go lower.
+	 * Hit max in previous round, stop here
 	 */
-	if (!rwb->scale_step)
+	if (rwb->scaled_max)
 		return;
 
 	rwb->scale_step--;
 	rwb->unknown_cnt = 0;
 	rwb->stat_ops->clear(rwb->ops_data);
-	calc_wb_limits(rwb);
 
-	if (waitqueue_active(&rwb->wait))
-		wake_up_all(&rwb->wait);
+	rwb->scaled_max = calc_wb_limits(rwb);
+
+	rwb_wake_all(rwb);
 
 	rwb_trace_step(rwb, "step up");
 }
 
-static void scale_down(struct rq_wb *rwb)
+/*
+ * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
+ * had a latency violation.
+ */
+static void scale_down(struct rq_wb *rwb, bool hard_throttle)
 {
 	/*
 	 * Stop scaling down when we've hit the limit. This also prevents
@@ -295,7 +374,12 @@ static void scale_down(struct rq_wb *rwb)
 	if (rwb->wb_max == 1)
 		return;
 
-	rwb->scale_step++;
+	if (rwb->scale_step < 0 && hard_throttle)
+		rwb->scale_step = 0;
+	else
+		rwb->scale_step++;
+
+	rwb->scaled_max = false;
 	rwb->unknown_cnt = 0;
 	rwb->stat_ops->clear(rwb->ops_data);
 	calc_wb_limits(rwb);
@@ -306,13 +390,23 @@ static void rwb_arm_timer(struct rq_wb *rwb)
 {
 	unsigned long expires;
 
-	/*
-	 * We should speed this up, using some variant of a fast integer
-	 * inverse square root calculation. Since we only do this for
-	 * every window expiration, it's not a huge deal, though.
-	 */
-	rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
+	if (rwb->scale_step > 0) {
+		/*
+		 * We should speed this up, using some variant of a fast
+		 * integer inverse square root calculation. Since we only do
+		 * this for every window expiration, it's not a huge deal,
+		 * though.
+		 */
+		rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
 					int_sqrt((rwb->scale_step + 1) << 8));
+	} else {
+		/*
+		 * For step < 0, we don't want to increase/decrease the
+		 * window size.
+		 */
+		rwb->cur_win_nsec = rwb->win_nsec;
+	}
+
 	expires = jiffies + nsecs_to_jiffies(rwb->cur_win_nsec);
 	mod_timer(&rwb->window_timer, expires);
 }
@@ -320,28 +414,45 @@ static void rwb_arm_timer(struct rq_wb *rwb)
 static void wb_timer_fn(unsigned long data)
 {
 	struct rq_wb *rwb = (struct rq_wb *) data;
+	unsigned int inflight = wbt_inflight(rwb);
 	int status;
 
+	status = latency_exceeded(rwb);
+
+	trace_wbt_timer(rwb->bdi, status, rwb->scale_step, inflight);
+
 	/*
 	 * If we exceeded the latency target, step down. If we did not,
 	 * step one level up. If we don't know enough to say either exceeded
 	 * or ok, then don't do anything.
 	 */
-	status = latency_exceeded(rwb);
 	switch (status) {
 	case LAT_EXCEEDED:
-		scale_down(rwb);
+		scale_down(rwb, true);
 		break;
 	case LAT_OK:
 		scale_up(rwb);
 		break;
+	case LAT_UNKNOWN_WRITES:
+		scale_up(rwb);
+		break;
 	case LAT_UNKNOWN:
+		if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP)
+			break;
 		/*
-		 * We had no read samples, start bumping up the write
-		 * depth slowly
+		 * We get here for two reasons:
+		 *
+		 * 1) We previously scaled reduced depth, and we currently
+		 *    don't have a valid read/write sample. For that case,
+		 *    slowly return to center state (step == 0).
+		 * 2) We started a the center step, but don't have a valid
+		 *    read/write sample, but we do have writes going on.
+		 *    Allow step to go negative, to increase write perf.
 		 */
-		if (++rwb->unknown_cnt >= RWB_UNKNOWN_BUMP)
+		if (rwb->scale_step > 0)
 			scale_up(rwb);
+		else if (rwb->scale_step < 0)
+			scale_down(rwb, false);
 		break;
 	default:
 		break;
@@ -350,17 +461,17 @@ static void wb_timer_fn(unsigned long data)
 	/*
 	 * Re-arm timer, if we have IO in flight
 	 */
-	if (rwb->scale_step || atomic_read(&rwb->inflight))
+	if (rwb->scale_step || inflight)
 		rwb_arm_timer(rwb);
 }
 
 void wbt_update_limits(struct rq_wb *rwb)
 {
 	rwb->scale_step = 0;
+	rwb->scaled_max = false;
 	calc_wb_limits(rwb);
 
-	if (waitqueue_active(&rwb->wait))
-		wake_up_all(&rwb->wait);
+	rwb_wake_all(rwb);
 }
 
 static bool close_io(struct rq_wb *rwb)
@@ -378,13 +489,14 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 	unsigned int limit;
 
 	/*
-	 * At this point we know it's a buffered write. If REQ_SYNC is
-	 * set, then it's WB_SYNC_ALL writeback, and we'll use the max
-	 * limit for that. If the write is marked as a background write,
-	 * then use the idle limit, or go to normal if we haven't had
-	 * competing IO for a bit.
+	 * At this point we know it's a buffered write. If this is
+	 * kswapd trying to free memory, or REQ_SYNC is set, set, then
+	 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
+	 * that. If the write is marked as a background write, then use
+	 * the idle limit, or go to normal if we haven't had competing
+	 * IO for a bit.
 	 */
-	if ((rw & REQ_HIPRIO) || atomic_read(&rwb->bdi->wb.dirty_sleeping))
+	if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
 		limit = rwb->wb_max;
 	else if ((rw & REQ_BG) || close_io(rwb)) {
 		/*
@@ -398,7 +510,8 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
 	return limit;
 }
 
-static inline bool may_queue(struct rq_wb *rwb, unsigned long rw)
+static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
+			     unsigned long rw)
 {
 	/*
 	 * inc it here even if disabled, since we'll dec it at completion.
@@ -406,11 +519,11 @@ static inline bool may_queue(struct rq_wb *rwb, unsigned long rw)
 	 * and someone turned it off at the same time.
 	 */
 	if (!rwb_enabled(rwb)) {
-		atomic_inc(&rwb->inflight);
+		atomic_inc(&rqw->inflight);
 		return true;
 	}
 
-	return atomic_inc_below(&rwb->inflight, get_limit(rwb, rw));
+	return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw));
 }
 
 /*
@@ -419,16 +532,17 @@ static inline bool may_queue(struct rq_wb *rwb, unsigned long rw)
  */
 static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
 {
+	struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd());
 	DEFINE_WAIT(wait);
 
-	if (may_queue(rwb, rw))
+	if (may_queue(rwb, rqw, rw))
 		return;
 
 	do {
-		prepare_to_wait_exclusive(&rwb->wait, &wait,
+		prepare_to_wait_exclusive(&rqw->wait, &wait,
 						TASK_UNINTERRUPTIBLE);
 
-		if (may_queue(rwb, rw))
+		if (may_queue(rwb, rqw, rw))
 			break;
 
 		if (lock)
@@ -440,15 +554,17 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
 			spin_lock_irq(lock);
 	} while (1);
 
-	finish_wait(&rwb->wait, &wait);
+	finish_wait(&rqw->wait, &wait);
 }
 
 static inline bool wbt_should_throttle(struct rq_wb *rwb, unsigned int rw)
 {
+	const int op = rw >> BIO_OP_SHIFT;
+
 	/*
 	 * If not a WRITE (or a discard), do nothing
 	 */
-	if (!(rw & REQ_WRITE) || (rw & REQ_DISCARD))
+	if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD))
 		return false;
 
 	/*
@@ -466,14 +582,20 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, unsigned int rw)
  * in an irq held spinlock, if it holds one when calling this function.
  * If we do sleep, we'll release and re-grab it.
  */
-bool wbt_wait(struct rq_wb *rwb, unsigned int rw, spinlock_t *lock)
+unsigned int wbt_wait(struct rq_wb *rwb, unsigned int rw, spinlock_t *lock)
 {
+	unsigned int ret = 0;
+
 	if (!rwb_enabled(rwb))
-		return false;
+		return 0;
+
+	if ((rw >> BIO_OP_SHIFT) == REQ_OP_READ)
+		ret = WBT_READ;
 
 	if (!wbt_should_throttle(rwb, rw)) {
-		wb_timestamp(rwb, &rwb->last_issue);
-		return false;
+		if (ret & WBT_READ)
+			wb_timestamp(rwb, &rwb->last_issue);
+		return ret;
 	}
 
 	__wbt_wait(rwb, rw, lock);
@@ -481,7 +603,10 @@ bool wbt_wait(struct rq_wb *rwb, unsigned int rw, spinlock_t *lock)
 	if (!timer_pending(&rwb->window_timer))
 		rwb_arm_timer(rwb);
 
-	return true;
+	if (current_is_kswapd())
+		ret |= WBT_KSWAPD;
+
+	return ret | WBT_TRACKED;
 }
 
 void wbt_issue(struct rq_wb *rwb, struct wb_issue_stat *stat)
@@ -499,7 +624,7 @@ void wbt_issue(struct rq_wb *rwb, struct wb_issue_stat *stat)
 	 * only use the address to compare with, which is why we store the
 	 * sync_issue time locally.
 	 */
-	if (!wbt_tracked(stat) && !rwb->sync_issue) {
+	if (wbt_is_read(stat) && !rwb->sync_issue) {
 		rwb->sync_cookie = stat;
 		rwb->sync_issue = wbt_issue_stat_get_time(stat);
 	}
@@ -531,9 +656,11 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
 
 void wbt_disable(struct rq_wb *rwb)
 {
-	del_timer_sync(&rwb->window_timer);
-	rwb->win_nsec = rwb->min_lat_nsec = 0;
-	wbt_update_limits(rwb);
+	if (rwb) {
+		del_timer_sync(&rwb->window_timer);
+		rwb->win_nsec = rwb->min_lat_nsec = 0;
+		wbt_update_limits(rwb);
+	}
 }
 EXPORT_SYMBOL_GPL(wbt_disable);
 
@@ -541,20 +668,27 @@ struct rq_wb *wbt_init(struct backing_dev_info *bdi, struct wb_stat_ops *ops,
 		       void *ops_data)
 {
 	struct rq_wb *rwb;
+	int i;
+
+	if (!ops->get || !ops->is_current || !ops->clear)
+		return ERR_PTR(-EINVAL);
 
 	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
 	if (!rwb)
 		return ERR_PTR(-ENOMEM);
 
-	atomic_set(&rwb->inflight, 0);
-	init_waitqueue_head(&rwb->wait);
+	for (i = 0; i < WBT_NUM_RWQ; i++) {
+		atomic_set(&rwb->rq_wait[i].inflight, 0);
+		init_waitqueue_head(&rwb->rq_wait[i].wait);
+	}
+
 	setup_timer(&rwb->window_timer, wb_timer_fn, (unsigned long) rwb);
 	rwb->wc = 1;
-	rwb->queue_depth = RWB_MAX_DEPTH;
+	rwb->queue_depth = RWB_DEF_DEPTH;
 	rwb->last_comp = rwb->last_issue = jiffies;
 	rwb->bdi = bdi;
 	rwb->win_nsec = RWB_WINDOW_NSEC;
-	rwb->stat_ops = ops,
+	rwb->stat_ops = ops;
 	rwb->ops_data = ops_data;
 	wbt_update_limits(rwb);
 	return rwb;
-- 
cgit v1.2.3-54-g00ecf