summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
commitd0b2f91bede3bd5e3d24dd6803e56eee959c1797 (patch)
tree7fee4ab0509879c373c4f2cbd5b8a5be5b4041ee /lib
parente914f8eb445e8f74b00303c19c2ffceaedd16a05 (diff)
Linux-libre 4.8.2-gnupck-4.8.2-gnu
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug67
-rw-r--r--lib/Kconfig.kasan4
-rw-r--r--lib/Makefile10
-rw-r--r--lib/atomic64.c32
-rw-r--r--lib/atomic64_test.c34
-rw-r--r--lib/bitmap.c2
-rw-r--r--lib/chacha20.c79
-rw-r--r--lib/crc32.c16
-rw-r--r--lib/digsig.c16
-rw-r--r--lib/dma-debug.c2
-rw-r--r--lib/dma-noop.c9
-rw-r--r--lib/dynamic_debug.c7
-rw-r--r--lib/earlycpio.c5
-rw-r--r--lib/hweight.c4
-rw-r--r--lib/iommu-helper.c3
-rw-r--r--lib/iov_iter.c53
-rw-r--r--lib/mpi/mpicoder.c247
-rw-r--r--lib/radix-tree.c92
-rw-r--r--lib/random32.c1
-rw-r--r--lib/ratelimit.c10
-rw-r--r--lib/rbtree.c26
-rw-r--r--lib/rhashtable.c20
-rw-r--r--lib/sradix-tree.c476
-rw-r--r--lib/stackdepot.c1
-rw-r--r--lib/strncpy_from_user.c8
-rw-r--r--lib/strnlen_user.c7
-rw-r--r--lib/swiotlb.c13
-rw-r--r--lib/test_hash.c30
-rw-r--r--lib/test_rhashtable.c2
-rw-r--r--lib/ubsan.c2
-rw-r--r--lib/wbt.c288
31 files changed, 1148 insertions, 418 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 42d3d798c..896a3d0b1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -244,6 +244,7 @@ config PAGE_OWNER
depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
select DEBUG_FS
select STACKTRACE
+ select STACKDEPOT
select PAGE_EXTENSION
help
This keeps track of what call chain is the owner of a page, may
@@ -708,6 +709,8 @@ config KCOV
bool "Code coverage for fuzzing"
depends on ARCH_HAS_KCOV
select DEBUG_FS
+ select GCC_PLUGINS if !COMPILE_TEST
+ select GCC_PLUGIN_SANCOV if !COMPILE_TEST
help
KCOV exposes kernel code coverage information in a form suitable
for coverage-guided fuzzing (randomized testing).
@@ -718,6 +721,17 @@ config KCOV
For more details, see Documentation/kcov.txt.
+config KCOV_INSTRUMENT_ALL
+ bool "Instrument all code by default"
+ depends on KCOV
+ default y if KCOV
+ help
+ If you are doing generic system call fuzzing (like e.g. syzkaller),
+ then you will want to instrument the whole kernel and you should
+ say y here. If you are doing more targeted fuzzing (like e.g.
+ filesystem fuzzing with AFL) then you will want to enable coverage
+ for more specific subsets of files, and should say n here.
+
config DEBUG_SHIRQ
bool "Debug shared IRQ handlers"
depends on DEBUG_KERNEL
@@ -807,7 +821,7 @@ config DETECT_HUNG_TASK
help
Say Y here to enable the kernel to detect "hung tasks",
which are bugs that cause the task to be stuck in
- uninterruptible "D" state indefinitiley.
+ uninterruptible "D" state indefinitely.
When a hung task is detected, the kernel will print the
current stack trace (which you should report), but the
@@ -1307,22 +1321,6 @@ config RCU_PERF_TEST
Say M if you want the RCU performance tests to build as a module.
Say N if you are unsure.
-config RCU_PERF_TEST_RUNNABLE
- bool "performance tests for RCU runnable by default"
- depends on RCU_PERF_TEST = y
- default n
- help
- This option provides a way to build the RCU performance tests
- directly into the kernel without them starting up at boot time.
- You can use /sys/module to manually override this setting.
- This /proc file is available only when the RCU performance
- tests have been built into the kernel.
-
- Say Y here if you want the RCU performance tests to start during
- boot (you probably don't).
- Say N here if you want the RCU performance tests to start only
- after being manually enabled via /sys/module.
-
config RCU_TORTURE_TEST
tristate "torture tests for RCU"
depends on DEBUG_KERNEL && !SCHED_BFS
@@ -1340,23 +1338,6 @@ config RCU_TORTURE_TEST
Say M if you want the RCU torture tests to build as a module.
Say N if you are unsure.
-config RCU_TORTURE_TEST_RUNNABLE
- bool "torture tests for RCU runnable by default"
- depends on RCU_TORTURE_TEST = y
- default n
- help
- This option provides a way to build the RCU torture tests
- directly into the kernel without them starting up at boot
- time. You can use /proc/sys/kernel/rcutorture_runnable
- to manually override this setting. This /proc file is
- available only when the RCU torture tests have been built
- into the kernel.
-
- Say Y here if you want the RCU torture tests to start during
- boot (you probably don't).
- Say N here if you want the RCU torture tests to start only
- after being manually enabled via /proc.
-
config RCU_TORTURE_TEST_SLOW_PREINIT
bool "Slow down RCU grace-period pre-initialization to expose races"
depends on RCU_TORTURE_TEST
@@ -1706,24 +1687,6 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
-config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
- bool
-
-config DEBUG_STRICT_USER_COPY_CHECKS
- bool "Strict user copy size checks"
- depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
- depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
- help
- Enabling this option turns a certain set of sanity checks for user
- copy operations into compile time failures.
-
- The copy_from_user() etc checks are there to help test if there
- are sufficient security checks on the length argument of
- the copy operation, by having gcc prove that the argument is
- within bounds.
-
- If unsure, say N.
-
source kernel/trace/Kconfig
menu "Runtime Testing"
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 67d8c6838..bd38aab05 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -5,9 +5,9 @@ if HAVE_ARCH_KASAN
config KASAN
bool "KASan: runtime memory debugger"
- depends on SLUB_DEBUG || (SLAB && !DEBUG_SLAB)
+ depends on SLUB || (SLAB && !DEBUG_SLAB)
select CONSTRUCTORS
- select STACKDEPOT if SLAB
+ select STACKDEPOT
help
Enables kernel address sanitizer - runtime memory debugger,
designed to find out-of-bounds accesses and use-after-free bugs.
diff --git a/lib/Makefile b/lib/Makefile
index 5b5506e3b..5bd016bd3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -15,19 +15,15 @@ KCOV_INSTRUMENT_rbtree.o := n
KCOV_INSTRUMENT_list_debug.o := n
KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
-# Kernel does not boot if we instrument this file as it uses custom calling
-# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
-KCOV_INSTRUMENT_hweight.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
- rbtree.o radix-tree.o dump_stack.o timerqueue.o\
+ rbtree.o radix-tree.o sradix-tree.o dump_stack.o timerqueue.o\
idr.o int_sqrt.o extable.o \
- sha1.o md5.o irq_regs.o argv_split.o \
+ sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
-obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
lib-$(CONFIG_HAS_DMA) += dma-noop.o
@@ -74,8 +70,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
-GCOV_PROFILE_hweight.o := n
-CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
obj-$(CONFIG_BTREE) += btree.o
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 2886ebac6..53c2d5edc 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -96,17 +96,41 @@ long long atomic64_##op##_return(long long a, atomic64_t *v) \
} \
EXPORT_SYMBOL(atomic64_##op##_return);
+#define ATOMIC64_FETCH_OP(op, c_op) \
+long long atomic64_fetch_##op(long long a, atomic64_t *v) \
+{ \
+ unsigned long flags; \
+ raw_spinlock_t *lock = lock_addr(v); \
+ long long val; \
+ \
+ raw_spin_lock_irqsave(lock, flags); \
+ val = v->counter; \
+ v->counter c_op a; \
+ raw_spin_unlock_irqrestore(lock, flags); \
+ return val; \
+} \
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
#define ATOMIC64_OPS(op, c_op) \
ATOMIC64_OP(op, c_op) \
- ATOMIC64_OP_RETURN(op, c_op)
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
ATOMIC64_OPS(add, +=)
ATOMIC64_OPS(sub, -=)
-ATOMIC64_OP(and, &=)
-ATOMIC64_OP(or, |=)
-ATOMIC64_OP(xor, ^=)
#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, c_op) \
+ ATOMIC64_OP(op, c_op) \
+ ATOMIC64_OP_RETURN(op, c_op) \
+ ATOMIC64_FETCH_OP(op, c_op)
+
+ATOMIC64_OPS(and, &=)
+ATOMIC64_OPS(or, |=)
+ATOMIC64_OPS(xor, ^=)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 123481814..dbb369145 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -53,11 +53,25 @@ do { \
BUG_ON(atomic##bit##_read(&v) != r); \
} while (0)
+#define TEST_FETCH(bit, op, c_op, val) \
+do { \
+ atomic##bit##_set(&v, v0); \
+ r = v0; \
+ r c_op val; \
+ BUG_ON(atomic##bit##_##op(val, &v) != v0); \
+ BUG_ON(atomic##bit##_read(&v) != r); \
+} while (0)
+
#define RETURN_FAMILY_TEST(bit, op, c_op, val) \
do { \
FAMILY_TEST(TEST_RETURN, bit, op, c_op, val); \
} while (0)
+#define FETCH_FAMILY_TEST(bit, op, c_op, val) \
+do { \
+ FAMILY_TEST(TEST_FETCH, bit, op, c_op, val); \
+} while (0)
+
#define TEST_ARGS(bit, op, init, ret, expect, args...) \
do { \
atomic##bit##_set(&v, init); \
@@ -114,6 +128,16 @@ static __init void test_atomic(void)
RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
RETURN_FAMILY_TEST(, sub_return, -=, -one);
+ FETCH_FAMILY_TEST(, fetch_add, +=, onestwos);
+ FETCH_FAMILY_TEST(, fetch_add, +=, -one);
+ FETCH_FAMILY_TEST(, fetch_sub, -=, onestwos);
+ FETCH_FAMILY_TEST(, fetch_sub, -=, -one);
+
+ FETCH_FAMILY_TEST(, fetch_or, |=, v1);
+ FETCH_FAMILY_TEST(, fetch_and, &=, v1);
+ FETCH_FAMILY_TEST(, fetch_andnot, &= ~, v1);
+ FETCH_FAMILY_TEST(, fetch_xor, ^=, v1);
+
INC_RETURN_FAMILY_TEST(, v0);
DEC_RETURN_FAMILY_TEST(, v0);
@@ -154,6 +178,16 @@ static __init void test_atomic64(void)
RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
RETURN_FAMILY_TEST(64, sub_return, -=, -one);
+ FETCH_FAMILY_TEST(64, fetch_add, +=, onestwos);
+ FETCH_FAMILY_TEST(64, fetch_add, +=, -one);
+ FETCH_FAMILY_TEST(64, fetch_sub, -=, onestwos);
+ FETCH_FAMILY_TEST(64, fetch_sub, -=, -one);
+
+ FETCH_FAMILY_TEST(64, fetch_or, |=, v1);
+ FETCH_FAMILY_TEST(64, fetch_and, &=, v1);
+ FETCH_FAMILY_TEST(64, fetch_andnot, &= ~, v1);
+ FETCH_FAMILY_TEST(64, fetch_xor, ^=, v1);
+
INIT(v0);
atomic64_inc(&v);
r += one;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index c66da508c..eca88087f 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -14,9 +14,9 @@
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/string.h>
+#include <linux/uaccess.h>
#include <asm/page.h>
-#include <asm/uaccess.h>
/*
* bitmaps provide an array of bits, implemented using an an
diff --git a/lib/chacha20.c b/lib/chacha20.c
new file mode 100644
index 000000000..250ceed9e
--- /dev/null
+++ b/lib/chacha20.c
@@ -0,0 +1,79 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/cryptohash.h>
+#include <asm/unaligned.h>
+#include <crypto/chacha20.h>
+
+static inline u32 rotl32(u32 v, u8 n)
+{
+ return (v << n) | (v >> (sizeof(v) * 8 - n));
+}
+
+extern void chacha20_block(u32 *state, void *stream)
+{
+ u32 x[16], *out = stream;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(x); i++)
+ x[i] = state[i];
+
+ for (i = 0; i < 20; i += 2) {
+ x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16);
+ x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16);
+ x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16);
+ x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16);
+
+ x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12);
+ x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12);
+ x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12);
+ x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12);
+
+ x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8);
+ x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8);
+ x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8);
+ x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8);
+
+ x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7);
+ x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7);
+ x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7);
+ x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7);
+
+ x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16);
+ x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16);
+ x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16);
+ x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16);
+
+ x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12);
+ x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12);
+ x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12);
+ x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12);
+
+ x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8);
+ x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8);
+ x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8);
+ x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8);
+
+ x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7);
+ x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7);
+ x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7);
+ x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(x); i++)
+ out[i] = cpu_to_le32(x[i] + state[i]);
+
+ state[12]++;
+}
+EXPORT_SYMBOL(chacha20_block);
diff --git a/lib/crc32.c b/lib/crc32.c
index 9a907d489..7fbd1a112 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -979,7 +979,6 @@ static int __init crc32c_test(void)
int i;
int errors = 0;
int bytes = 0;
- struct timespec start, stop;
u64 nsec;
unsigned long flags;
@@ -999,20 +998,17 @@ static int __init crc32c_test(void)
local_irq_save(flags);
local_irq_disable();
- getnstimeofday(&start);
+ nsec = ktime_get_ns();
for (i = 0; i < 100; i++) {
if (test[i].crc32c_le != __crc32c_le(test[i].crc, test_buf +
test[i].start, test[i].length))
errors++;
}
- getnstimeofday(&stop);
+ nsec = ktime_get_ns() - nsec;
local_irq_restore(flags);
local_irq_enable();
- nsec = stop.tv_nsec - start.tv_nsec +
- 1000000000 * (stop.tv_sec - start.tv_sec);
-
pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS);
if (errors)
@@ -1065,7 +1061,6 @@ static int __init crc32_test(void)
int i;
int errors = 0;
int bytes = 0;
- struct timespec start, stop;
u64 nsec;
unsigned long flags;
@@ -1088,7 +1083,7 @@ static int __init crc32_test(void)
local_irq_save(flags);
local_irq_disable();
- getnstimeofday(&start);
+ nsec = ktime_get_ns();
for (i = 0; i < 100; i++) {
if (test[i].crc_le != crc32_le(test[i].crc, test_buf +
test[i].start, test[i].length))
@@ -1098,14 +1093,11 @@ static int __init crc32_test(void)
test[i].start, test[i].length))
errors++;
}
- getnstimeofday(&stop);
+ nsec = ktime_get_ns() - nsec;
local_irq_restore(flags);
local_irq_enable();
- nsec = stop.tv_nsec - start.tv_nsec +
- 1000000000 * (stop.tv_sec - start.tv_sec);
-
pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n",
CRC_LE_BITS, CRC_BE_BITS);
diff --git a/lib/digsig.c b/lib/digsig.c
index 07be6c1ef..55b8b2f41 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -104,21 +104,25 @@ static int digsig_verify_rsa(struct key *key,
datap = pkh->mpi;
endp = ukp->data + ukp->datalen;
- err = -ENOMEM;
-
for (i = 0; i < pkh->nmpi; i++) {
unsigned int remaining = endp - datap;
pkey[i] = mpi_read_from_buffer(datap, &remaining);
- if (!pkey[i])
+ if (IS_ERR(pkey[i])) {
+ err = PTR_ERR(pkey[i]);
goto err;
+ }
datap += remaining;
}
mblen = mpi_get_nbits(pkey[0]);
mlen = DIV_ROUND_UP(mblen, 8);
- if (mlen == 0)
+ if (mlen == 0) {
+ err = -EINVAL;
goto err;
+ }
+
+ err = -ENOMEM;
out1 = kzalloc(mlen, GFP_KERNEL);
if (!out1)
@@ -126,8 +130,10 @@ static int digsig_verify_rsa(struct key *key,
nret = siglen;
in = mpi_read_from_buffer(sig, &nret);
- if (!in)
+ if (IS_ERR(in)) {
+ err = PTR_ERR(in);
goto err;
+ }
res = mpi_alloc(mpi_get_nlimbs(in) * 2);
if (!res)
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 51a76af25..fcfa1939a 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -253,6 +253,7 @@ static int hash_fn(struct dma_debug_entry *entry)
*/
static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
unsigned long *flags)
+ __acquires(&dma_entry_hash[idx].lock)
{
int idx = hash_fn(entry);
unsigned long __flags;
@@ -267,6 +268,7 @@ static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
*/
static void put_hash_bucket(struct hash_bucket *bucket,
unsigned long *flags)
+ __releases(&bucket->lock)
{
unsigned long __flags = *flags;
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
index 721456468..3d766e78f 100644
--- a/lib/dma-noop.c
+++ b/lib/dma-noop.c
@@ -10,7 +10,7 @@
static void *dma_noop_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
void *ret;
@@ -22,7 +22,7 @@ static void *dma_noop_alloc(struct device *dev, size_t size,
static void dma_noop_free(struct device *dev, size_t size,
void *cpu_addr, dma_addr_t dma_addr,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
free_pages((unsigned long)cpu_addr, get_order(size));
}
@@ -30,13 +30,14 @@ static void dma_noop_free(struct device *dev, size_t size,
static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return page_to_phys(page) + offset;
}
static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir,
+ unsigned long attrs)
{
int i;
struct scatterlist *sg;
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index fe42b6ec3..da796e2dc 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -188,6 +188,13 @@ static int ddebug_change(const struct ddebug_query *query,
newflags = (dp->flags & mask) | flags;
if (newflags == dp->flags)
continue;
+#ifdef HAVE_JUMP_LABEL
+ if (dp->flags & _DPRINTK_FLAGS_PRINT) {
+ if (!(flags & _DPRINTK_FLAGS_PRINT))
+ static_branch_disable(&dp->key.dd_key_true);
+ } else if (flags & _DPRINTK_FLAGS_PRINT)
+ static_branch_enable(&dp->key.dd_key_true);
+#endif
dp->flags = newflags;
vpr_info("changed %s:%d [%s]%s =%s\n",
trim_prefix(dp->filename), dp->lineno,
diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index 3eb3e4722..db283ba4d 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -125,7 +125,10 @@ struct cpio_data find_cpio_data(const char *path, void *data,
if ((ch[C_MODE] & 0170000) == 0100000 &&
ch[C_NAMESIZE] >= mypathsize &&
!memcmp(p, path, mypathsize)) {
- *nextoff = (long)nptr - (long)data;
+
+ if (nextoff)
+ *nextoff = (long)nptr - (long)data;
+
if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) {
pr_warn(
"File %s exceeding MAX_CPIO_FILE_NAME [%d]\n",
diff --git a/lib/hweight.c b/lib/hweight.c
index 9a5c1f221..43273a7d8 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,6 +9,7 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/
+#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned int __sw_hweight32(unsigned int w)
{
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
@@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
#endif
}
EXPORT_SYMBOL(__sw_hweight32);
+#endif
unsigned int __sw_hweight16(unsigned int w)
{
@@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
}
EXPORT_SYMBOL(__sw_hweight8);
+#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned long __sw_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
@@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
#endif
}
EXPORT_SYMBOL(__sw_hweight64);
+#endif
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index c27e26921..a816f3a80 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -29,8 +29,7 @@ again:
index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
if (index < size) {
if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
- /* we could do more effectively */
- start = index + 1;
+ start = ALIGN(shift + index, boundary_size) - shift;
goto again;
}
bitmap_set(map, index, nr);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index eaaf73032..7e3138cfc 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -56,37 +56,24 @@
n = wanted; \
}
-#define iterate_bvec(i, n, __v, __p, skip, STEP) { \
- size_t wanted = n; \
- __p = i->bvec; \
- __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \
- if (likely(__v.bv_len)) { \
- __v.bv_page = __p->bv_page; \
- __v.bv_offset = __p->bv_offset + skip; \
- (void)(STEP); \
- skip += __v.bv_len; \
- n -= __v.bv_len; \
- } \
- while (unlikely(n)) { \
- __p++; \
- __v.bv_len = min_t(size_t, n, __p->bv_len); \
- if (unlikely(!__v.bv_len)) \
+#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
+ struct bvec_iter __start; \
+ __start.bi_size = n; \
+ __start.bi_bvec_done = skip; \
+ __start.bi_idx = 0; \
+ for_each_bvec(__v, i->bvec, __bi, __start) { \
+ if (!__v.bv_len) \
continue; \
- __v.bv_page = __p->bv_page; \
- __v.bv_offset = __p->bv_offset; \
(void)(STEP); \
- skip = __v.bv_len; \
- n -= __v.bv_len; \
} \
- n = wanted; \
}
#define iterate_all_kinds(i, n, v, I, B, K) { \
size_t skip = i->iov_offset; \
if (unlikely(i->type & ITER_BVEC)) { \
- const struct bio_vec *bvec; \
struct bio_vec v; \
- iterate_bvec(i, n, v, bvec, skip, (B)) \
+ struct bvec_iter __bi; \
+ iterate_bvec(i, n, v, __bi, skip, (B)) \
} else if (unlikely(i->type & ITER_KVEC)) { \
const struct kvec *kvec; \
struct kvec v; \
@@ -104,15 +91,13 @@
if (i->count) { \
size_t skip = i->iov_offset; \
if (unlikely(i->type & ITER_BVEC)) { \
- const struct bio_vec *bvec; \
+ const struct bio_vec *bvec = i->bvec; \
struct bio_vec v; \
- iterate_bvec(i, n, v, bvec, skip, (B)) \
- if (skip == bvec->bv_len) { \
- bvec++; \
- skip = 0; \
- } \
- i->nr_segs -= bvec - i->bvec; \
- i->bvec = bvec; \
+ struct bvec_iter __bi; \
+ iterate_bvec(i, n, v, __bi, skip, (B)) \
+ i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
+ i->nr_segs -= i->bvec - bvec; \
+ skip = __bi.bi_bvec_done; \
} else if (unlikely(i->type & ITER_KVEC)) { \
const struct kvec *kvec; \
struct kvec v; \
@@ -159,7 +144,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
buf = iov->iov_base + skip;
copy = min(bytes, iov->iov_len - skip);
- if (!fault_in_pages_writeable(buf, copy)) {
+ if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
kaddr = kmap_atomic(page);
from = kaddr + offset;
@@ -190,6 +175,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
copy = min(bytes, iov->iov_len - skip);
}
/* Too bad - revert to non-atomic kmap */
+
kaddr = kmap(page);
from = kaddr + offset;
left = __copy_to_user(buf, from, copy);
@@ -208,6 +194,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
bytes -= copy;
}
kunmap(page);
+
done:
if (skip == iov->iov_len) {
iov++;
@@ -240,7 +227,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
buf = iov->iov_base + skip;
copy = min(bytes, iov->iov_len - skip);
- if (!fault_in_pages_readable(buf, copy)) {
+ if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
kaddr = kmap_atomic(page);
to = kaddr + offset;
@@ -271,6 +258,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
copy = min(bytes, iov->iov_len - skip);
}
/* Too bad - revert to non-atomic kmap */
+
kaddr = kmap(page);
to = kaddr + offset;
left = __copy_from_user(to, buf, copy);
@@ -289,6 +277,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
bytes -= copy;
}
kunmap(page);
+
done:
if (skip == iov->iov_len) {
iov++;
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 747606f9e..5a0f75a3b 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -21,6 +21,7 @@
#include <linux/bitops.h>
#include <linux/count_zeros.h>
#include <linux/byteorder/generic.h>
+#include <linux/scatterlist.h>
#include <linux/string.h>
#include "mpi-internal.h"
@@ -50,9 +51,7 @@ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes)
return NULL;
}
if (nbytes > 0)
- nbits -= count_leading_zeros(buffer[0]);
- else
- nbits = 0;
+ nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8);
nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
@@ -82,50 +81,30 @@ EXPORT_SYMBOL_GPL(mpi_read_raw_data);
MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
{
const uint8_t *buffer = xbuffer;
- int i, j;
- unsigned nbits, nbytes, nlimbs, nread = 0;
- mpi_limb_t a;
- MPI val = NULL;
+ unsigned int nbits, nbytes;
+ MPI val;
if (*ret_nread < 2)
- goto leave;
+ return ERR_PTR(-EINVAL);
nbits = buffer[0] << 8 | buffer[1];
if (nbits > MAX_EXTERN_MPI_BITS) {
pr_info("MPI: mpi too large (%u bits)\n", nbits);
- goto leave;
+ return ERR_PTR(-EINVAL);
}
- buffer += 2;
- nread = 2;
nbytes = DIV_ROUND_UP(nbits, 8);
- nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
- val = mpi_alloc(nlimbs);
- if (!val)
- return NULL;
- i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
- i %= BYTES_PER_MPI_LIMB;
- val->nbits = nbits;
- j = val->nlimbs = nlimbs;
- val->sign = 0;
- for (; j > 0; j--) {
- a = 0;
- for (; i < BYTES_PER_MPI_LIMB; i++) {
- if (++nread > *ret_nread) {
- printk
- ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
- nread, *ret_nread);
- goto leave;
- }
- a <<= 8;
- a |= *buffer++;
- }
- i = 0;
- val->d[j - 1] = a;
+ if (nbytes + 2 > *ret_nread) {
+ pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n",
+ nbytes, *ret_nread);
+ return ERR_PTR(-EINVAL);
}
-leave:
- *ret_nread = nread;
+ val = mpi_read_raw_data(buffer + 2, nbytes);
+ if (!val)
+ return ERR_PTR(-ENOMEM);
+
+ *ret_nread = nbytes + 2;
return val;
}
EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
@@ -250,82 +229,6 @@ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
}
EXPORT_SYMBOL_GPL(mpi_get_buffer);
-/****************
- * Use BUFFER to update MPI.
- */
-int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
-{
- const uint8_t *buffer = xbuffer, *p;
- mpi_limb_t alimb;
- int nlimbs;
- int i;
-
- nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
- if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
- return -ENOMEM;
- a->sign = sign;
-
- for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
-#if BYTES_PER_MPI_LIMB == 4
- alimb = (mpi_limb_t) *p--;
- alimb |= (mpi_limb_t) *p-- << 8;
- alimb |= (mpi_limb_t) *p-- << 16;
- alimb |= (mpi_limb_t) *p-- << 24;
-#elif BYTES_PER_MPI_LIMB == 8
- alimb = (mpi_limb_t) *p--;
- alimb |= (mpi_limb_t) *p-- << 8;
- alimb |= (mpi_limb_t) *p-- << 16;
- alimb |= (mpi_limb_t) *p-- << 24;
- alimb |= (mpi_limb_t) *p-- << 32;
- alimb |= (mpi_limb_t) *p-- << 40;
- alimb |= (mpi_limb_t) *p-- << 48;
- alimb |= (mpi_limb_t) *p-- << 56;
-#else
-#error please implement for this limb size.
-#endif
- a->d[i++] = alimb;
- }
- if (p >= buffer) {
-#if BYTES_PER_MPI_LIMB == 4
- alimb = *p--;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 8;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 16;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 24;
-#elif BYTES_PER_MPI_LIMB == 8
- alimb = (mpi_limb_t) *p--;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 8;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 16;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 24;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 32;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 40;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 48;
- if (p >= buffer)
- alimb |= (mpi_limb_t) *p-- << 56;
-#else
-#error please implement for this limb size.
-#endif
- a->d[i++] = alimb;
- }
- a->nlimbs = i;
-
- if (i != nlimbs) {
- pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
- nlimbs);
- BUG();
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(mpi_set_buffer);
-
/**
* mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first)
*
@@ -335,16 +238,13 @@ EXPORT_SYMBOL_GPL(mpi_set_buffer);
* @a: a multi precision integer
* @sgl: scatterlist to write to. Needs to be at least
* mpi_get_size(a) long.
- * @nbytes: in/out param - it has the be set to the maximum number of
- * bytes that can be written to sgl. This has to be at least
- * the size of the integer a. On return it receives the actual
- * length of the data written on success or the data that would
- * be written if buffer was too small.
+ * @nbytes: the number of bytes to write. Leading bytes will be
+ * filled with zero.
* @sign: if not NULL, it will be set to the sign of a.
*
* Return: 0 on success or error code in case of error
*/
-int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
+int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes,
int *sign)
{
u8 *p, *p2;
@@ -356,55 +256,60 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
#error please implement for this limb size.
#endif
unsigned int n = mpi_get_size(a);
- int i, x, y = 0, lzeros, buf_len;
-
- if (!nbytes)
- return -EINVAL;
+ struct sg_mapping_iter miter;
+ int i, x, buf_len;
+ int nents;
if (sign)
*sign = a->sign;
- lzeros = count_lzeros(a);
-
- if (*nbytes < n - lzeros) {
- *nbytes = n - lzeros;
+ if (nbytes < n)
return -EOVERFLOW;
- }
- *nbytes = n - lzeros;
- buf_len = sgl->length;
- p2 = sg_virt(sgl);
+ nents = sg_nents_for_len(sgl, nbytes);
+ if (nents < 0)
+ return -EINVAL;
- for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB,
- lzeros %= BYTES_PER_MPI_LIMB;
- i >= 0; i--) {
+ sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG);
+ sg_miter_next(&miter);
+ buf_len = miter.length;
+ p2 = miter.addr;
+
+ while (nbytes > n) {
+ i = min_t(unsigned, nbytes - n, buf_len);
+ memset(p2, 0, i);
+ p2 += i;
+ nbytes -= i;
+
+ buf_len -= i;
+ if (!buf_len) {
+ sg_miter_next(&miter);
+ buf_len = miter.length;
+ p2 = miter.addr;
+ }
+ }
+
+ for (i = a->nlimbs - 1; i >= 0; i--) {
#if BYTES_PER_MPI_LIMB == 4
- alimb = cpu_to_be32(a->d[i]);
+ alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0;
#elif BYTES_PER_MPI_LIMB == 8
- alimb = cpu_to_be64(a->d[i]);
+ alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0;
#else
#error please implement for this limb size.
#endif
- if (lzeros) {
- y = lzeros;
- lzeros = 0;
- }
-
- p = (u8 *)&alimb + y;
+ p = (u8 *)&alimb;
- for (x = 0; x < sizeof(alimb) - y; x++) {
- if (!buf_len) {
- sgl = sg_next(sgl);
- if (!sgl)
- return -EINVAL;
- buf_len = sgl->length;
- p2 = sg_virt(sgl);
- }
+ for (x = 0; x < sizeof(alimb); x++) {
*p2++ = *p++;
- buf_len--;
+ if (!--buf_len) {
+ sg_miter_next(&miter);
+ buf_len = miter.length;
+ p2 = miter.addr;
+ }
}
- y = 0;
}
+
+ sg_miter_stop(&miter);
return 0;
}
EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
@@ -424,19 +329,23 @@ EXPORT_SYMBOL_GPL(mpi_write_to_sgl);
*/
MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
{
- struct scatterlist *sg;
- int x, i, j, z, lzeros, ents;
+ struct sg_mapping_iter miter;
unsigned int nbits, nlimbs;
+ int x, j, z, lzeros, ents;
+ unsigned int len;
+ const u8 *buff;
mpi_limb_t a;
MPI val = NULL;
- lzeros = 0;
- ents = sg_nents(sgl);
+ ents = sg_nents_for_len(sgl, nbytes);
+ if (ents < 0)
+ return NULL;
- for_each_sg(sgl, sg, ents, i) {
- const u8 *buff = sg_virt(sg);
- int len = sg->length;
+ sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG);
+ lzeros = 0;
+ len = 0;
+ while (nbytes > 0) {
while (len && !*buff) {
lzeros++;
len--;
@@ -446,12 +355,17 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
if (len && *buff)
break;
- ents--;
+ sg_miter_next(&miter);
+ buff = miter.addr;
+ len = miter.length;
+
nbytes -= lzeros;
lzeros = 0;
}
- sgl = sg;
+ miter.consumed = lzeros;
+ sg_miter_stop(&miter);
+
nbytes -= lzeros;
nbits = nbytes * 8;
if (nbits > MAX_EXTERN_MPI_BITS) {
@@ -460,8 +374,7 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
}
if (nbytes > 0)
- nbits -= count_leading_zeros(*(u8 *)(sg_virt(sgl) + lzeros)) -
- (BITS_PER_LONG - 8);
+ nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8);
nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB);
val = mpi_alloc(nlimbs);
@@ -480,21 +393,21 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
z %= BYTES_PER_MPI_LIMB;
- for_each_sg(sgl, sg, ents, i) {
- const u8 *buffer = sg_virt(sg) + lzeros;
- int len = sg->length - lzeros;
+ while (sg_miter_next(&miter)) {
+ buff = miter.addr;
+ len = miter.length;
for (x = 0; x < len; x++) {
a <<= 8;
- a |= *buffer++;
+ a |= *buff++;
if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) {
val->d[j--] = a;
a = 0;
}
}
z += x;
- lzeros = 0;
}
+
return val;
}
EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index bc7852f95..91f0727e3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,6 +38,9 @@
#include <linux/preempt.h> /* in_interrupt() */
+/* Number of nodes in fully populated tree of given height */
+static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
+
/*
* Radix tree node cache.
*/
@@ -102,10 +105,10 @@ static unsigned int radix_tree_descend(struct radix_tree_node *parent,
#ifdef CONFIG_RADIX_TREE_MULTIORDER
if (radix_tree_is_internal_node(entry)) {
- unsigned long siboff = get_slot_offset(parent, entry);
- if (siboff < RADIX_TREE_MAP_SIZE) {
- offset = siboff;
- entry = rcu_dereference_raw(parent->slots[offset]);
+ if (is_sibling_entry(parent, entry)) {
+ void **sibentry = (void **) entry_to_node(entry);
+ offset = get_slot_offset(parent, sibentry);
+ entry = rcu_dereference_raw(*sibentry);
}
}
#endif
@@ -342,7 +345,7 @@ radix_tree_node_free(struct radix_tree_node *node)
* To make use of this facility, the radix tree must be initialised without
* __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
*/
-static int __radix_tree_preload(gfp_t gfp_mask)
+static int __radix_tree_preload(gfp_t gfp_mask, int nr)
{
struct radix_tree_preload *rtp;
struct radix_tree_node *node;
@@ -356,14 +359,14 @@ static int __radix_tree_preload(gfp_t gfp_mask)
preempt_disable();
rtp = this_cpu_ptr(&radix_tree_preloads);
- while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+ while (rtp->nr < nr) {
preempt_enable();
node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
if (node == NULL)
goto out;
preempt_disable();
rtp = this_cpu_ptr(&radix_tree_preloads);
- if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) {
+ if (rtp->nr < nr) {
node->private_data = rtp->nodes;
rtp->nodes = node;
rtp->nr++;
@@ -389,7 +392,7 @@ int radix_tree_preload(gfp_t gfp_mask)
{
/* Warn on non-sensical use... */
WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
- return __radix_tree_preload(gfp_mask);
+ return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
}
EXPORT_SYMBOL(radix_tree_preload);
@@ -401,7 +404,7 @@ EXPORT_SYMBOL(radix_tree_preload);
int radix_tree_maybe_preload(gfp_t gfp_mask)
{
if (gfpflags_allow_blocking(gfp_mask))
- return __radix_tree_preload(gfp_mask);
+ return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
/* Preloading doesn't help anything with this gfp mask, skip it */
preempt_disable();
return 0;
@@ -409,6 +412,51 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
EXPORT_SYMBOL(radix_tree_maybe_preload);
/*
+ * The same as function above, but preload number of nodes required to insert
+ * (1 << order) continuous naturally-aligned elements.
+ */
+int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
+{
+ unsigned long nr_subtrees;
+ int nr_nodes, subtree_height;
+
+ /* Preloading doesn't help anything with this gfp mask, skip it */
+ if (!gfpflags_allow_blocking(gfp_mask)) {
+ preempt_disable();
+ return 0;
+ }
+
+ /*
+ * Calculate number and height of fully populated subtrees it takes to
+ * store (1 << order) elements.
+ */
+ nr_subtrees = 1 << order;
+ for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
+ subtree_height++)
+ nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
+
+ /*
+ * The worst case is zero height tree with a single item at index 0 and
+ * then inserting items starting at ULONG_MAX - (1 << order).
+ *
+ * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
+ * 0-index item.
+ */
+ nr_nodes = RADIX_TREE_MAX_PATH;
+
+ /* Plus branch to fully populated subtrees. */
+ nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
+
+ /* Root node is shared. */
+ nr_nodes--;
+
+ /* Plus nodes required to build subtrees. */
+ nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
+
+ return __radix_tree_preload(gfp_mask, nr_nodes);
+}
+
+/*
* The maximum index which can be stored in a radix tree
*/
static inline unsigned long shift_maxindex(unsigned int shift)
@@ -1577,6 +1625,31 @@ radix_tree_node_ctor(void *arg)
INIT_LIST_HEAD(&node->private_list);
}
+static __init unsigned long __maxindex(unsigned int height)
+{
+ unsigned int width = height * RADIX_TREE_MAP_SHIFT;
+ int shift = RADIX_TREE_INDEX_BITS - width;
+
+ if (shift < 0)
+ return ~0UL;
+ if (shift >= BITS_PER_LONG)
+ return 0UL;
+ return ~0UL >> shift;
+}
+
+static __init void radix_tree_init_maxnodes(void)
+{
+ unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
+ unsigned int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+ height_to_maxindex[i] = __maxindex(i);
+ for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
+ for (j = i; j > 0; j--)
+ height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
+ }
+}
+
static int radix_tree_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
@@ -1603,5 +1676,6 @@ void __init radix_tree_init(void)
sizeof(struct radix_tree_node), 0,
SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
radix_tree_node_ctor);
+ radix_tree_init_maxnodes();
hotcpu_notifier(radix_tree_callback, 0);
}
diff --git a/lib/random32.c b/lib/random32.c
index 510d1ce7d..69ed593aa 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -233,7 +233,6 @@ static void __prandom_timer(unsigned long dontcare)
static void __init __prandom_start_seed_timer(void)
{
- set_timer_slack(&seed_timer, HZ);
seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
add_timer(&seed_timer);
}
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 2c5de8646..08f8043ca 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -46,12 +46,14 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
rs->begin = jiffies;
if (time_is_before_jiffies(rs->begin + rs->interval)) {
- if (rs->missed)
- printk(KERN_WARNING "%s: %d callbacks suppressed\n",
- func, rs->missed);
+ if (rs->missed) {
+ if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
+ pr_warn("%s: %d callbacks suppressed\n", func, rs->missed);
+ rs->missed = 0;
+ }
+ }
rs->begin = jiffies;
rs->printed = 0;
- rs->missed = 0;
}
if (rs->burst && rs->burst > rs->printed) {
rs->printed++;
diff --git a/lib/rbtree.c b/lib/rbtree.c
index 1356454e3..eb8a19fee 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -539,17 +539,39 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
{
struct rb_node *parent = rb_parent(victim);
+ /* Copy the pointers/colour from the victim to the replacement */
+ *new = *victim;
+
/* Set the surrounding nodes to point to the replacement */
- __rb_change_child(victim, new, parent, root);
if (victim->rb_left)
rb_set_parent(victim->rb_left, new);
if (victim->rb_right)
rb_set_parent(victim->rb_right, new);
+ __rb_change_child(victim, new, parent, root);
+}
+EXPORT_SYMBOL(rb_replace_node);
+
+void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root)
+{
+ struct rb_node *parent = rb_parent(victim);
/* Copy the pointers/colour from the victim to the replacement */
*new = *victim;
+
+ /* Set the surrounding nodes to point to the replacement */
+ if (victim->rb_left)
+ rb_set_parent(victim->rb_left, new);
+ if (victim->rb_right)
+ rb_set_parent(victim->rb_right, new);
+
+ /* Set the parent's pointer to the new node last after an RCU barrier
+ * so that the pointers onwards are seen to be set correctly when doing
+ * an RCU walk over the tree.
+ */
+ __rb_change_child_rcu(victim, new, parent, root);
}
-EXPORT_SYMBOL(rb_replace_node);
+EXPORT_SYMBOL(rb_replace_node_rcu);
static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
{
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 5d845ffd7..56054e541 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -30,7 +30,7 @@
#define HASH_DEFAULT_SIZE 64UL
#define HASH_MIN_SIZE 4U
-#define BUCKET_LOCKS_PER_CPU 128UL
+#define BUCKET_LOCKS_PER_CPU 32UL
static u32 head_hashfn(struct rhashtable *ht,
const struct bucket_table *tbl,
@@ -70,21 +70,25 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
unsigned int nr_pcpus = num_possible_cpus();
#endif
- nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+ nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
/* Never allocate more than 0.5 locks per bucket */
size = min_t(unsigned int, size, tbl->size >> 1);
if (sizeof(spinlock_t) != 0) {
+ tbl->locks = NULL;
#ifdef CONFIG_NUMA
if (size * sizeof(spinlock_t) > PAGE_SIZE &&
gfp == GFP_KERNEL)
tbl->locks = vmalloc(size * sizeof(spinlock_t));
- else
#endif
- tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
- gfp);
+ if (gfp != GFP_KERNEL)
+ gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
+ if (!tbl->locks)
+ tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+ gfp);
if (!tbl->locks)
return -ENOMEM;
for (i = 0; i < size; i++)
@@ -321,12 +325,14 @@ static int rhashtable_expand(struct rhashtable *ht)
static int rhashtable_shrink(struct rhashtable *ht)
{
struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
- unsigned int size;
+ unsigned int nelems = atomic_read(&ht->nelems);
+ unsigned int size = 0;
int err;
ASSERT_RHT_MUTEX(ht);
- size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+ if (nelems)
+ size = roundup_pow_of_two(nelems * 3 / 2);
if (size < ht->p.min_size)
size = ht->p.min_size;
diff --git a/lib/sradix-tree.c b/lib/sradix-tree.c
new file mode 100644
index 000000000..8d0632917
--- /dev/null
+++ b/lib/sradix-tree.c
@@ -0,0 +1,476 @@
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/gcd.h>
+#include <linux/sradix-tree.h>
+
+static inline int sradix_node_full(struct sradix_tree_root *root, struct sradix_tree_node *node)
+{
+ return node->fulls == root->stores_size ||
+ (node->height == 1 && node->count == root->stores_size);
+}
+
+/*
+ * Extend a sradix tree so it can store key @index.
+ */
+static int sradix_tree_extend(struct sradix_tree_root *root, unsigned long index)
+{
+ struct sradix_tree_node *node;
+ unsigned int height;
+
+ if (unlikely(root->rnode == NULL)) {
+ if (!(node = root->alloc()))
+ return -ENOMEM;
+
+ node->height = 1;
+ root->rnode = node;
+ root->height = 1;
+ }
+
+ /* Figure out what the height should be. */
+ height = root->height;
+ index >>= root->shift * height;
+
+ while (index) {
+ index >>= root->shift;
+ height++;
+ }
+
+ while (height > root->height) {
+ unsigned int newheight;
+ if (!(node = root->alloc()))
+ return -ENOMEM;
+
+ /* Increase the height. */
+ node->stores[0] = root->rnode;
+ root->rnode->parent = node;
+ if (root->extend)
+ root->extend(node, root->rnode);
+
+ newheight = root->height + 1;
+ node->height = newheight;
+ node->count = 1;
+ if (sradix_node_full(root, root->rnode))
+ node->fulls = 1;
+
+ root->rnode = node;
+ root->height = newheight;
+ }
+
+ return 0;
+}
+
+/*
+ * Search the next item from the current node, that is not NULL
+ * and can satify root->iter().
+ */
+void *sradix_tree_next(struct sradix_tree_root *root,
+ struct sradix_tree_node *node, unsigned long index,
+ int (*iter)(void *item, unsigned long height))
+{
+ unsigned long offset;
+ void *item;
+
+ if (unlikely(node == NULL)) {
+ node = root->rnode;
+ for (offset = 0; offset < root->stores_size; offset++) {
+ item = node->stores[offset];
+ if (item && (!iter || iter(item, node->height)))
+ break;
+ }
+
+ if (unlikely(offset >= root->stores_size))
+ return NULL;
+
+ if (node->height == 1)
+ return item;
+ else
+ goto go_down;
+ }
+
+ while (node) {
+ offset = (index & root->mask) + 1;
+ for (;offset < root->stores_size; offset++) {
+ item = node->stores[offset];
+ if (item && (!iter || iter(item, node->height)))
+ break;
+ }
+
+ if (offset < root->stores_size)
+ break;
+
+ node = node->parent;
+ index >>= root->shift;
+ }
+
+ if (!node)
+ return NULL;
+
+ while (node->height > 1) {
+go_down:
+ node = item;
+ for (offset = 0; offset < root->stores_size; offset++) {
+ item = node->stores[offset];
+ if (item && (!iter || iter(item, node->height)))
+ break;
+ }
+
+ if (unlikely(offset >= root->stores_size))
+ return NULL;
+ }
+
+ BUG_ON(offset > root->stores_size);
+
+ return item;
+}
+
+/*
+ * Blindly insert the item to the tree. Typically, we reuse the
+ * first empty store item.
+ */
+int sradix_tree_enter(struct sradix_tree_root *root, void **item, int num)
+{
+ unsigned long index;
+ unsigned int height;
+ struct sradix_tree_node *node, *tmp = NULL;
+ int offset, offset_saved;
+ void **store = NULL;
+ int error, i, j, shift;
+
+go_on:
+ index = root->min;
+
+ if (root->enter_node && !sradix_node_full(root, root->enter_node)) {
+ node = root->enter_node;
+ BUG_ON((index >> (root->shift * root->height)));
+ } else {
+ node = root->rnode;
+ if (node == NULL || (index >> (root->shift * root->height))
+ || sradix_node_full(root, node)) {
+ error = sradix_tree_extend(root, index);
+ if (error)
+ return error;
+
+ node = root->rnode;
+ }
+ }
+
+
+ height = node->height;
+ shift = (height - 1) * root->shift;
+ offset = (index >> shift) & root->mask;
+ while (shift > 0) {
+ offset_saved = offset;
+ for (; offset < root->stores_size; offset++) {
+ store = &node->stores[offset];
+ tmp = *store;
+
+ if (!tmp || !sradix_node_full(root, tmp))
+ break;
+ }
+ BUG_ON(offset >= root->stores_size);
+
+ if (offset != offset_saved) {
+ index += (offset - offset_saved) << shift;
+ index &= ~((1UL << shift) - 1);
+ }
+
+ if (!tmp) {
+ if (!(tmp = root->alloc()))
+ return -ENOMEM;
+
+ tmp->height = shift / root->shift;
+ *store = tmp;
+ tmp->parent = node;
+ node->count++;
+// if (root->extend)
+// root->extend(node, tmp);
+ }
+
+ node = tmp;
+ shift -= root->shift;
+ offset = (index >> shift) & root->mask;
+ }
+
+ BUG_ON(node->height != 1);
+
+
+ store = &node->stores[offset];
+ for (i = 0, j = 0;
+ j < root->stores_size - node->count &&
+ i < root->stores_size - offset && j < num; i++) {
+ if (!store[i]) {
+ store[i] = item[j];
+ if (root->assign)
+ root->assign(node, index + i, item[j]);
+ j++;
+ }
+ }
+
+ node->count += j;
+ root->num += j;
+ num -= j;
+
+ while (sradix_node_full(root, node)) {
+ node = node->parent;
+ if (!node)
+ break;
+
+ node->fulls++;
+ }
+
+ if (unlikely(!node)) {
+ /* All nodes are full */
+ root->min = 1 << (root->height * root->shift);
+ root->enter_node = NULL;
+ } else {
+ root->min = index + i - 1;
+ root->min |= (1UL << (node->height - 1)) - 1;
+ root->min++;
+ root->enter_node = node;
+ }
+
+ if (num) {
+ item += j;
+ goto go_on;
+ }
+
+ return 0;
+}
+
+
+/**
+ * sradix_tree_shrink - shrink height of a sradix tree to minimal
+ * @root sradix tree root
+ *
+ */
+static inline void sradix_tree_shrink(struct sradix_tree_root *root)
+{
+ /* try to shrink tree height */
+ while (root->height > 1) {
+ struct sradix_tree_node *to_free = root->rnode;
+
+ /*
+ * The candidate node has more than one child, or its child
+ * is not at the leftmost store, we cannot shrink.
+ */
+ if (to_free->count != 1 || !to_free->stores[0])
+ break;
+
+ root->rnode = to_free->stores[0];
+ root->rnode->parent = NULL;
+ root->height--;
+ if (unlikely(root->enter_node == to_free)) {
+ root->enter_node = NULL;
+ }
+ root->free(to_free);
+ }
+}
+
+/*
+ * Del the item on the known leaf node and index
+ */
+void sradix_tree_delete_from_leaf(struct sradix_tree_root *root,
+ struct sradix_tree_node *node, unsigned long index)
+{
+ unsigned int offset;
+ struct sradix_tree_node *start, *end;
+
+ BUG_ON(node->height != 1);
+
+ start = node;
+ while (node && !(--node->count))
+ node = node->parent;
+
+ end = node;
+ if (!node) {
+ root->rnode = NULL;
+ root->height = 0;
+ root->min = 0;
+ root->num = 0;
+ root->enter_node = NULL;
+ } else {
+ offset = (index >> (root->shift * (node->height - 1))) & root->mask;
+ if (root->rm)
+ root->rm(node, offset);
+ node->stores[offset] = NULL;
+ root->num--;
+ if (root->min > index) {
+ root->min = index;
+ root->enter_node = node;
+ }
+ }
+
+ if (start != end) {
+ do {
+ node = start;
+ start = start->parent;
+ if (unlikely(root->enter_node == node))
+ root->enter_node = end;
+ root->free(node);
+ } while (start != end);
+
+ /*
+ * Note that shrink may free "end", so enter_node still need to
+ * be checked inside.
+ */
+ sradix_tree_shrink(root);
+ } else if (node->count == root->stores_size - 1) {
+ /* It WAS a full leaf node. Update the ancestors */
+ node = node->parent;
+ while (node) {
+ node->fulls--;
+ if (node->fulls != root->stores_size - 1)
+ break;
+
+ node = node->parent;
+ }
+ }
+}
+
+void *sradix_tree_lookup(struct sradix_tree_root *root, unsigned long index)
+{
+ unsigned int height, offset;
+ struct sradix_tree_node *node;
+ int shift;
+
+ node = root->rnode;
+ if (node == NULL || (index >> (root->shift * root->height)))
+ return NULL;
+
+ height = root->height;
+ shift = (height - 1) * root->shift;
+
+ do {
+ offset = (index >> shift) & root->mask;
+ node = node->stores[offset];
+ if (!node)
+ return NULL;
+
+ shift -= root->shift;
+ } while (shift >= 0);
+
+ return node;
+}
+
+/*
+ * Return the item if it exists, otherwise create it in place
+ * and return the created item.
+ */
+void *sradix_tree_lookup_create(struct sradix_tree_root *root,
+ unsigned long index, void *(*item_alloc)(void))
+{
+ unsigned int height, offset;
+ struct sradix_tree_node *node, *tmp;
+ void *item;
+ int shift, error;
+
+ if (root->rnode == NULL || (index >> (root->shift * root->height))) {
+ if (item_alloc) {
+ error = sradix_tree_extend(root, index);
+ if (error)
+ return NULL;
+ } else {
+ return NULL;
+ }
+ }
+
+ node = root->rnode;
+ height = root->height;
+ shift = (height - 1) * root->shift;
+
+ do {
+ offset = (index >> shift) & root->mask;
+ if (!node->stores[offset]) {
+ if (!(tmp = root->alloc()))
+ return NULL;
+
+ tmp->height = shift / root->shift;
+ node->stores[offset] = tmp;
+ tmp->parent = node;
+ node->count++;
+ node = tmp;
+ } else {
+ node = node->stores[offset];
+ }
+
+ shift -= root->shift;
+ } while (shift > 0);
+
+ BUG_ON(node->height != 1);
+ offset = index & root->mask;
+ if (node->stores[offset]) {
+ return node->stores[offset];
+ } else if (item_alloc) {
+ if (!(item = item_alloc()))
+ return NULL;
+
+ node->stores[offset] = item;
+
+ /*
+ * NOTE: we do NOT call root->assign here, since this item is
+ * newly created by us having no meaning. Caller can call this
+ * if it's necessary to do so.
+ */
+
+ node->count++;
+ root->num++;
+
+ while (sradix_node_full(root, node)) {
+ node = node->parent;
+ if (!node)
+ break;
+
+ node->fulls++;
+ }
+
+ if (unlikely(!node)) {
+ /* All nodes are full */
+ root->min = 1 << (root->height * root->shift);
+ } else {
+ if (root->min == index) {
+ root->min |= (1UL << (node->height - 1)) - 1;
+ root->min++;
+ root->enter_node = node;
+ }
+ }
+
+ return item;
+ } else {
+ return NULL;
+ }
+
+}
+
+int sradix_tree_delete(struct sradix_tree_root *root, unsigned long index)
+{
+ unsigned int height, offset;
+ struct sradix_tree_node *node;
+ int shift;
+
+ node = root->rnode;
+ if (node == NULL || (index >> (root->shift * root->height)))
+ return -ENOENT;
+
+ height = root->height;
+ shift = (height - 1) * root->shift;
+
+ do {
+ offset = (index >> shift) & root->mask;
+ node = node->stores[offset];
+ if (!node)
+ return -ENOENT;
+
+ shift -= root->shift;
+ } while (shift > 0);
+
+ offset = index & root->mask;
+ if (!node->stores[offset])
+ return -ENOENT;
+
+ sradix_tree_delete_from_leaf(root, node, index);
+
+ return 0;
+}
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 53ad6c083..60f77f1d4 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -242,6 +242,7 @@ depot_stack_handle_t depot_save_stack(struct stack_trace *trace,
*/
alloc_flags &= ~GFP_ZONEMASK;
alloc_flags &= (GFP_ATOMIC | GFP_KERNEL);
+ alloc_flags |= __GFP_NOWARN;
page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER);
if (page)
prealloc = page_address(page);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 33f655ef4..9c5fe8110 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -40,8 +40,8 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long
unsigned long c, data;
/* Fall back to byte-at-a-time if we get a page fault */
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
- break;
+ unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
+
*(unsigned long *)(dst+res) = c;
if (has_zero(c, &data, &constants)) {
data = prep_zero_mask(c, data, &constants);
@@ -56,8 +56,7 @@ byte_at_a_time:
while (max) {
char c;
- if (unlikely(unsafe_get_user(c,src+res)))
- return -EFAULT;
+ unsafe_get_user(c,src+res, efault);
dst[res] = c;
if (!c)
return res;
@@ -76,6 +75,7 @@ byte_at_a_time:
* Nope: we hit the address space limit, and we still had more
* characters the caller would have wanted. That's an EFAULT.
*/
+efault:
return -EFAULT;
}
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 262594362..8e105ed4d 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -45,8 +45,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
src -= align;
max += align;
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)src)))
- return 0;
+ unsafe_get_user(c, (unsigned long __user *)src, efault);
c |= aligned_byte_mask(align);
for (;;) {
@@ -61,8 +60,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
if (unlikely(max <= sizeof(unsigned long)))
break;
max -= sizeof(unsigned long);
- if (unlikely(unsafe_get_user(c,(unsigned long __user *)(src+res))))
- return 0;
+ unsafe_get_user(c, (unsigned long __user *)(src+res), efault);
}
res -= align;
@@ -77,6 +75,7 @@ static inline long do_strnlen_user(const char __user *src, unsigned long count,
* Nope: we hit the address space limit, and we still had more
* characters the caller would have wanted. That's 0.
*/
+efault:
return 0;
}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 76f29ecba..22e13a0e1 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -738,7 +738,7 @@ swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
phys_addr_t map, phys = page_to_phys(page) + offset;
dma_addr_t dev_addr = phys_to_dma(dev, phys);
@@ -807,7 +807,7 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unmap_single(hwdev, dev_addr, size, dir);
}
@@ -877,7 +877,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_device);
*/
int
swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir, struct dma_attrs *attrs)
+ enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *sg;
int i;
@@ -914,7 +914,7 @@ int
swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
enum dma_data_direction dir)
{
- return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
+ return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, 0);
}
EXPORT_SYMBOL(swiotlb_map_sg);
@@ -924,7 +924,8 @@ EXPORT_SYMBOL(swiotlb_map_sg);
*/
void
swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
+ int nelems, enum dma_data_direction dir,
+ unsigned long attrs)
{
struct scatterlist *sg;
int i;
@@ -941,7 +942,7 @@ void
swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
enum dma_data_direction dir)
{
- return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
+ return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, 0);
}
EXPORT_SYMBOL(swiotlb_unmap_sg);
diff --git a/lib/test_hash.c b/lib/test_hash.c
index c9549c8b4..cac20c5fb 100644
--- a/lib/test_hash.c
+++ b/lib/test_hash.c
@@ -143,7 +143,7 @@ static int __init
test_hash_init(void)
{
char buf[SIZE+1];
- u32 string_or = 0, hash_or[2][33] = { 0 };
+ u32 string_or = 0, hash_or[2][33] = { { 0, } };
unsigned tests = 0;
unsigned long long h64 = 0;
int i, j;
@@ -155,8 +155,8 @@ test_hash_init(void)
buf[j] = '\0';
for (i = 0; i <= j; i++) {
- u64 hashlen = hashlen_string(buf+i);
- u32 h0 = full_name_hash(buf+i, j-i);
+ u64 hashlen = hashlen_string(buf+i, buf+i);
+ u32 h0 = full_name_hash(buf+i, buf+i, j-i);
/* Check that hashlen_string gets the length right */
if (hashlen_len(hashlen) != j-i) {
@@ -219,21 +219,27 @@ test_hash_init(void)
}
/* Issue notices about skipped tests. */
-#ifndef HAVE_ARCH__HASH_32
- pr_info("__hash_32() has no arch implementation to test.");
-#elif HAVE_ARCH__HASH_32 != 1
+#ifdef HAVE_ARCH__HASH_32
+#if HAVE_ARCH__HASH_32 != 1
pr_info("__hash_32() is arch-specific; not compared to generic.");
#endif
-#ifndef HAVE_ARCH_HASH_32
- pr_info("hash_32() has no arch implementation to test.");
-#elif HAVE_ARCH_HASH_32 != 1
+#else
+ pr_info("__hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_32
+#if HAVE_ARCH_HASH_32 != 1
pr_info("hash_32() is arch-specific; not compared to generic.");
#endif
-#ifndef HAVE_ARCH_HASH_64
- pr_info("hash_64() has no arch implementation to test.");
-#elif HAVE_ARCH_HASH_64 != 1
+#else
+ pr_info("hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_64
+#if HAVE_ARCH_HASH_64 != 1
pr_info("hash_64() is arch-specific; not compared to generic.");
#endif
+#else
+ pr_info("hash_64() has no arch implementation to test.");
+#endif
pr_notice("%u tests passed.", tests);
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 297fdb5e7..64e899b63 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)");
static int max_size = 0;
module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
+MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
static bool shrinking = false;
module_param(shrinking, bool, 0);
diff --git a/lib/ubsan.c b/lib/ubsan.c
index 8799ae5e2..fb0409df1 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -308,7 +308,7 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data,
return;
ubsan_prologue(&data->location, &flags);
- pr_err("%s address %pk with insufficient space\n",
+ pr_err("%s address %p with insufficient space\n",
type_check_kinds[data->type_check_kind],
(void *) ptr);
pr_err("for an object of type %s\n", data->type->type_name);
diff --git a/lib/wbt.c b/lib/wbt.c
index cc5a24270..257c7b099 100644
--- a/lib/wbt.c
+++ b/lib/wbt.c
@@ -1,5 +1,5 @@
/*
- * buffered writeback throttling. losely based on CoDel. We can't drop
+ * buffered writeback throttling. loosely based on CoDel. We can't drop
* packets for IO scheduling, so the logic is something like this:
*
* - Monitor latencies in a defined window of time.
@@ -9,30 +9,31 @@
* - For any window where we don't have solid data on what the latencies
* look like, retain status quo.
* - If latencies look good, decrement scaling step.
+ * - If we're only doing writes, allow the scaling step to go negative. This
+ * will temporarily boost write performance, snapping back to a stable
+ * scaling step of 0 if reads show up or the heavy writers finish. Unlike
+ * positive scaling steps where we shrink the monitoring window, a negative
+ * scaling step retains the default step==0 window size.
*
* Copyright (C) 2016 Jens Axboe
*
- * Things that (may) need changing:
- *
- * - Different scaling of background/normal/high priority writeback.
- * We may have to violate guarantees for max.
- * - We can have mismatches between the stat window and our window.
- *
*/
#include <linux/kernel.h>
#include <linux/blk_types.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/wbt.h>
+#include <linux/swap.h>
#define CREATE_TRACE_POINTS
#include <trace/events/wbt.h>
enum {
/*
- * Might need to be higher
+ * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
+ * from here depending on device stats
*/
- RWB_MAX_DEPTH = 64,
+ RWB_DEF_DEPTH = 16,
/*
* 100msec window
@@ -40,10 +41,9 @@ enum {
RWB_WINDOW_NSEC = 100 * 1000 * 1000ULL,
/*
- * Disregard stats, if we don't meet these minimums
+ * Disregard stats, if we don't meet this minimum
*/
RWB_MIN_WRITE_SAMPLES = 3,
- RWB_MIN_READ_SAMPLES = 1,
/*
* If we have this number of consecutive windows with not enough
@@ -89,18 +89,51 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
}
}
-void __wbt_done(struct rq_wb *rwb)
+/*
+ * If a task was rate throttled in balance_dirty_pages() within the last
+ * second or so, use that to indicate a higher cleaning rate.
+ */
+static bool wb_recent_wait(struct rq_wb *rwb)
+{
+ struct bdi_writeback *wb = &rwb->bdi->wb;
+
+ return time_before(jiffies, wb->dirty_sleep + HZ);
+}
+
+static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd)
+{
+ return &rwb->rq_wait[is_kswapd];
+}
+
+static void rwb_wake_all(struct rq_wb *rwb)
+{
+ int i;
+
+ for (i = 0; i < WBT_NUM_RWQ; i++) {
+ struct rq_wait *rqw = &rwb->rq_wait[i];
+
+ if (waitqueue_active(&rqw->wait))
+ wake_up_all(&rqw->wait);
+ }
+}
+
+void __wbt_done(struct rq_wb *rwb, enum wbt_flags wb_acct)
{
+ struct rq_wait *rqw;
int inflight, limit;
- inflight = atomic_dec_return(&rwb->inflight);
+ if (!(wb_acct & WBT_TRACKED))
+ return;
+
+ rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD);
+ inflight = atomic_dec_return(&rqw->inflight);
/*
* wbt got disabled with IO in flight. Wake up any potential
* waiters, we don't have to do more than that.
*/
if (unlikely(!rwb_enabled(rwb))) {
- wake_up_all(&rwb->wait);
+ rwb_wake_all(rwb);
return;
}
@@ -108,7 +141,7 @@ void __wbt_done(struct rq_wb *rwb)
* If the device does write back caching, drop further down
* before we wake people up.
*/
- if (rwb->wc && !atomic_read(&rwb->bdi->wb.dirty_sleeping))
+ if (rwb->wc && !wb_recent_wait(rwb))
limit = 0;
else
limit = rwb->wb_normal;
@@ -119,11 +152,11 @@ void __wbt_done(struct rq_wb *rwb)
if (inflight && inflight >= limit)
return;
- if (waitqueue_active(&rwb->wait)) {
+ if (waitqueue_active(&rqw->wait)) {
int diff = limit - inflight;
if (!inflight || diff >= rwb->wb_background / 2)
- wake_up_nr(&rwb->wait, 1);
+ wake_up(&rqw->wait);
}
}
@@ -136,27 +169,33 @@ void wbt_done(struct rq_wb *rwb, struct wb_issue_stat *stat)
if (!rwb)
return;
- if (!wbt_tracked(stat)) {
+ if (!wbt_is_tracked(stat)) {
if (rwb->sync_cookie == stat) {
rwb->sync_issue = 0;
rwb->sync_cookie = NULL;
}
- wb_timestamp(rwb, &rwb->last_comp);
+ if (wbt_is_read(stat))
+ wb_timestamp(rwb, &rwb->last_comp);
+ wbt_clear_state(stat);
} else {
WARN_ON_ONCE(stat == rwb->sync_cookie);
- __wbt_done(rwb);
- wbt_clear_tracked(stat);
+ __wbt_done(rwb, wbt_stat_to_mask(stat));
+ wbt_clear_state(stat);
}
}
-static void calc_wb_limits(struct rq_wb *rwb)
+/*
+ * Return true, if we can't increase the depth further by scaling
+ */
+static bool calc_wb_limits(struct rq_wb *rwb)
{
unsigned int depth;
+ bool ret = false;
if (!rwb->min_lat_nsec) {
rwb->wb_max = rwb->wb_normal = rwb->wb_background = 0;
- return;
+ return false;
}
/*
@@ -167,22 +206,44 @@ static void calc_wb_limits(struct rq_wb *rwb)
* scaling down, then keep a setting of 1/1/1.
*/
if (rwb->queue_depth == 1) {
- if (rwb->scale_step)
+ if (rwb->scale_step > 0)
rwb->wb_max = rwb->wb_normal = 1;
- else
+ else {
rwb->wb_max = rwb->wb_normal = 2;
+ ret = true;
+ }
rwb->wb_background = 1;
} else {
- depth = min_t(unsigned int, RWB_MAX_DEPTH, rwb->queue_depth);
+ /*
+ * scale_step == 0 is our default state. If we have suffered
+ * latency spikes, step will be > 0, and we shrink the
+ * allowed write depths. If step is < 0, we're only doing
+ * writes, and we allow a temporarily higher depth to
+ * increase performance.
+ */
+ depth = min_t(unsigned int, RWB_DEF_DEPTH, rwb->queue_depth);
+ if (rwb->scale_step > 0)
+ depth = 1 + ((depth - 1) >> min(31, rwb->scale_step));
+ else if (rwb->scale_step < 0) {
+ unsigned int maxd = 3 * rwb->queue_depth / 4;
+
+ depth = 1 + ((depth - 1) << -rwb->scale_step);
+ if (depth > maxd) {
+ depth = maxd;
+ ret = true;
+ }
+ }
/*
* Set our max/normal/bg queue depths based on how far
* we have scaled down (->scale_step).
*/
- rwb->wb_max = 1 + ((depth - 1) >> min(31U, rwb->scale_step));
+ rwb->wb_max = depth;
rwb->wb_normal = (rwb->wb_max + 1) / 2;
rwb->wb_background = (rwb->wb_max + 3) / 4;
}
+
+ return ret;
}
static bool inline stat_sample_valid(struct blk_rq_stat *stat)
@@ -209,8 +270,9 @@ static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
}
enum {
- LAT_OK,
+ LAT_OK = 1,
LAT_UNKNOWN,
+ LAT_UNKNOWN_WRITES,
LAT_EXCEEDED,
};
@@ -234,8 +296,21 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
return LAT_EXCEEDED;
}
- if (!stat_sample_valid(stat))
+ /*
+ * No read/write mix, if stat isn't valid
+ */
+ if (!stat_sample_valid(stat)) {
+ /*
+ * If we had writes in this stat window and the window is
+ * current, we're only doing writes. If a task recently
+ * waited or still has writes in flights, consider us doing
+ * just writes as well.
+ */
+ if ((stat[1].nr_samples && rwb->stat_ops->is_current(stat)) ||
+ wb_recent_wait(rwb) || wbt_inflight(rwb))
+ return LAT_UNKNOWN_WRITES;
return LAT_UNKNOWN;
+ }
/*
* If the 'min' latency exceeds our target, step down.
@@ -269,23 +344,27 @@ static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
static void scale_up(struct rq_wb *rwb)
{
/*
- * If we're at 0, we can't go lower.
+ * Hit max in previous round, stop here
*/
- if (!rwb->scale_step)
+ if (rwb->scaled_max)
return;
rwb->scale_step--;
rwb->unknown_cnt = 0;
rwb->stat_ops->clear(rwb->ops_data);
- calc_wb_limits(rwb);
- if (waitqueue_active(&rwb->wait))
- wake_up_all(&rwb->wait);
+ rwb->scaled_max = calc_wb_limits(rwb);
+
+ rwb_wake_all(rwb);
rwb_trace_step(rwb, "step up");
}
-static void scale_down(struct rq_wb *rwb)
+/*
+ * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
+ * had a latency violation.
+ */
+static void scale_down(struct rq_wb *rwb, bool hard_throttle)
{
/*
* Stop scaling down when we've hit the limit. This also prevents
@@ -295,7 +374,12 @@ static void scale_down(struct rq_wb *rwb)
if (rwb->wb_max == 1)
return;
- rwb->scale_step++;
+ if (rwb->scale_step < 0 && hard_throttle)
+ rwb->scale_step = 0;
+ else
+ rwb->scale_step++;
+
+ rwb->scaled_max = false;
rwb->unknown_cnt = 0;
rwb->stat_ops->clear(rwb->ops_data);
calc_wb_limits(rwb);
@@ -306,13 +390,23 @@ static void rwb_arm_timer(struct rq_wb *rwb)
{
unsigned long expires;
- /*
- * We should speed this up, using some variant of a fast integer
- * inverse square root calculation. Since we only do this for
- * every window expiration, it's not a huge deal, though.
- */
- rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
+ if (rwb->scale_step > 0) {
+ /*
+ * We should speed this up, using some variant of a fast
+ * integer inverse square root calculation. Since we only do
+ * this for every window expiration, it's not a huge deal,
+ * though.
+ */
+ rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4,
int_sqrt((rwb->scale_step + 1) << 8));
+ } else {
+ /*
+ * For step < 0, we don't want to increase/decrease the
+ * window size.
+ */
+ rwb->cur_win_nsec = rwb->win_nsec;
+ }
+
expires = jiffies + nsecs_to_jiffies(rwb->cur_win_nsec);
mod_timer(&rwb->window_timer, expires);
}
@@ -320,28 +414,45 @@ static void rwb_arm_timer(struct rq_wb *rwb)
static void wb_timer_fn(unsigned long data)
{
struct rq_wb *rwb = (struct rq_wb *) data;
+ unsigned int inflight = wbt_inflight(rwb);
int status;
+ status = latency_exceeded(rwb);
+
+ trace_wbt_timer(rwb->bdi, status, rwb->scale_step, inflight);
+
/*
* If we exceeded the latency target, step down. If we did not,
* step one level up. If we don't know enough to say either exceeded
* or ok, then don't do anything.
*/
- status = latency_exceeded(rwb);
switch (status) {
case LAT_EXCEEDED:
- scale_down(rwb);
+ scale_down(rwb, true);
break;
case LAT_OK:
scale_up(rwb);
break;
+ case LAT_UNKNOWN_WRITES:
+ scale_up(rwb);
+ break;
case LAT_UNKNOWN:
+ if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP)
+ break;
/*
- * We had no read samples, start bumping up the write
- * depth slowly
+ * We get here for two reasons:
+ *
+ * 1) We previously scaled reduced depth, and we currently
+ * don't have a valid read/write sample. For that case,
+ * slowly return to center state (step == 0).
+ * 2) We started a the center step, but don't have a valid
+ * read/write sample, but we do have writes going on.
+ * Allow step to go negative, to increase write perf.
*/
- if (++rwb->unknown_cnt >= RWB_UNKNOWN_BUMP)
+ if (rwb->scale_step > 0)
scale_up(rwb);
+ else if (rwb->scale_step < 0)
+ scale_down(rwb, false);
break;
default:
break;
@@ -350,17 +461,17 @@ static void wb_timer_fn(unsigned long data)
/*
* Re-arm timer, if we have IO in flight
*/
- if (rwb->scale_step || atomic_read(&rwb->inflight))
+ if (rwb->scale_step || inflight)
rwb_arm_timer(rwb);
}
void wbt_update_limits(struct rq_wb *rwb)
{
rwb->scale_step = 0;
+ rwb->scaled_max = false;
calc_wb_limits(rwb);
- if (waitqueue_active(&rwb->wait))
- wake_up_all(&rwb->wait);
+ rwb_wake_all(rwb);
}
static bool close_io(struct rq_wb *rwb)
@@ -378,13 +489,14 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
unsigned int limit;
/*
- * At this point we know it's a buffered write. If REQ_SYNC is
- * set, then it's WB_SYNC_ALL writeback, and we'll use the max
- * limit for that. If the write is marked as a background write,
- * then use the idle limit, or go to normal if we haven't had
- * competing IO for a bit.
+ * At this point we know it's a buffered write. If this is
+ * kswapd trying to free memory, or REQ_SYNC is set, set, then
+ * it's WB_SYNC_ALL writeback, and we'll use the max limit for
+ * that. If the write is marked as a background write, then use
+ * the idle limit, or go to normal if we haven't had competing
+ * IO for a bit.
*/
- if ((rw & REQ_HIPRIO) || atomic_read(&rwb->bdi->wb.dirty_sleeping))
+ if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
limit = rwb->wb_max;
else if ((rw & REQ_BG) || close_io(rwb)) {
/*
@@ -398,7 +510,8 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
return limit;
}
-static inline bool may_queue(struct rq_wb *rwb, unsigned long rw)
+static inline bool may_queue(struct rq_wb *rwb, struct rq_wait *rqw,
+ unsigned long rw)
{
/*
* inc it here even if disabled, since we'll dec it at completion.
@@ -406,11 +519,11 @@ static inline bool may_queue(struct rq_wb *rwb, unsigned long rw)
* and someone turned it off at the same time.
*/
if (!rwb_enabled(rwb)) {
- atomic_inc(&rwb->inflight);
+ atomic_inc(&rqw->inflight);
return true;
}
- return atomic_inc_below(&rwb->inflight, get_limit(rwb, rw));
+ return atomic_inc_below(&rqw->inflight, get_limit(rwb, rw));
}
/*
@@ -419,16 +532,17 @@ static inline bool may_queue(struct rq_wb *rwb, unsigned long rw)
*/
static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
{
+ struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd());
DEFINE_WAIT(wait);
- if (may_queue(rwb, rw))
+ if (may_queue(rwb, rqw, rw))
return;
do {
- prepare_to_wait_exclusive(&rwb->wait, &wait,
+ prepare_to_wait_exclusive(&rqw->wait, &wait,
TASK_UNINTERRUPTIBLE);
- if (may_queue(rwb, rw))
+ if (may_queue(rwb, rqw, rw))
break;
if (lock)
@@ -440,15 +554,17 @@ static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
spin_lock_irq(lock);
} while (1);
- finish_wait(&rwb->wait, &wait);
+ finish_wait(&rqw->wait, &wait);
}
static inline bool wbt_should_throttle(struct rq_wb *rwb, unsigned int rw)
{
+ const int op = rw >> BIO_OP_SHIFT;
+
/*
* If not a WRITE (or a discard), do nothing
*/
- if (!(rw & REQ_WRITE) || (rw & REQ_DISCARD))
+ if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD))
return false;
/*
@@ -466,14 +582,20 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, unsigned int rw)
* in an irq held spinlock, if it holds one when calling this function.
* If we do sleep, we'll release and re-grab it.
*/
-bool wbt_wait(struct rq_wb *rwb, unsigned int rw, spinlock_t *lock)
+unsigned int wbt_wait(struct rq_wb *rwb, unsigned int rw, spinlock_t *lock)
{
+ unsigned int ret = 0;
+
if (!rwb_enabled(rwb))
- return false;
+ return 0;
+
+ if ((rw >> BIO_OP_SHIFT) == REQ_OP_READ)
+ ret = WBT_READ;
if (!wbt_should_throttle(rwb, rw)) {
- wb_timestamp(rwb, &rwb->last_issue);
- return false;
+ if (ret & WBT_READ)
+ wb_timestamp(rwb, &rwb->last_issue);
+ return ret;
}
__wbt_wait(rwb, rw, lock);
@@ -481,7 +603,10 @@ bool wbt_wait(struct rq_wb *rwb, unsigned int rw, spinlock_t *lock)
if (!timer_pending(&rwb->window_timer))
rwb_arm_timer(rwb);
- return true;
+ if (current_is_kswapd())
+ ret |= WBT_KSWAPD;
+
+ return ret | WBT_TRACKED;
}
void wbt_issue(struct rq_wb *rwb, struct wb_issue_stat *stat)
@@ -499,7 +624,7 @@ void wbt_issue(struct rq_wb *rwb, struct wb_issue_stat *stat)
* only use the address to compare with, which is why we store the
* sync_issue time locally.
*/
- if (!wbt_tracked(stat) && !rwb->sync_issue) {
+ if (wbt_is_read(stat) && !rwb->sync_issue) {
rwb->sync_cookie = stat;
rwb->sync_issue = wbt_issue_stat_get_time(stat);
}
@@ -531,9 +656,11 @@ void wbt_set_write_cache(struct rq_wb *rwb, bool write_cache_on)
void wbt_disable(struct rq_wb *rwb)
{
- del_timer_sync(&rwb->window_timer);
- rwb->win_nsec = rwb->min_lat_nsec = 0;
- wbt_update_limits(rwb);
+ if (rwb) {
+ del_timer_sync(&rwb->window_timer);
+ rwb->win_nsec = rwb->min_lat_nsec = 0;
+ wbt_update_limits(rwb);
+ }
}
EXPORT_SYMBOL_GPL(wbt_disable);
@@ -541,20 +668,27 @@ struct rq_wb *wbt_init(struct backing_dev_info *bdi, struct wb_stat_ops *ops,
void *ops_data)
{
struct rq_wb *rwb;
+ int i;
+
+ if (!ops->get || !ops->is_current || !ops->clear)
+ return ERR_PTR(-EINVAL);
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
if (!rwb)
return ERR_PTR(-ENOMEM);
- atomic_set(&rwb->inflight, 0);
- init_waitqueue_head(&rwb->wait);
+ for (i = 0; i < WBT_NUM_RWQ; i++) {
+ atomic_set(&rwb->rq_wait[i].inflight, 0);
+ init_waitqueue_head(&rwb->rq_wait[i].wait);
+ }
+
setup_timer(&rwb->window_timer, wb_timer_fn, (unsigned long) rwb);
rwb->wc = 1;
- rwb->queue_depth = RWB_MAX_DEPTH;
+ rwb->queue_depth = RWB_DEF_DEPTH;
rwb->last_comp = rwb->last_issue = jiffies;
rwb->bdi = bdi;
rwb->win_nsec = RWB_WINDOW_NSEC;
- rwb->stat_ops = ops,
+ rwb->stat_ops = ops;
rwb->ops_data = ops_data;
wbt_update_limits(rwb);
return rwb;