summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
commitd0b2f91bede3bd5e3d24dd6803e56eee959c1797 (patch)
tree7fee4ab0509879c373c4f2cbd5b8a5be5b4041ee /block
parente914f8eb445e8f74b00303c19c2ffceaedd16a05 (diff)
Linux-libre 4.8.2-gnupck-4.8.2-gnu
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig13
-rw-r--r--block/Makefile2
-rw-r--r--block/bfq-cgroup.c54
-rw-r--r--block/bfq-iosched.c305
-rw-r--r--block/bfq-sched.c28
-rw-r--r--block/bfq.h28
-rw-r--r--block/bio-integrity.c12
-rw-r--r--block/bio.c74
-rw-r--r--block/blk-cgroup.c4
-rw-r--r--block/blk-core.c220
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-flush.c23
-rw-r--r--block/blk-lib.c63
-rw-r--r--block/blk-map.c27
-rw-r--r--block/blk-merge.c65
-rw-r--r--block/blk-mq-sysfs.c12
-rw-r--r--block/blk-mq-tag.c26
-rw-r--r--block/blk-mq.c192
-rw-r--r--block/blk-stat.c48
-rw-r--r--block/blk-stat.h1
-rw-r--r--block/blk-sysfs.c21
-rw-r--r--block/blk-throttle.c19
-rw-r--r--block/blk.h2
-rw-r--r--block/cfq-iosched.c441
-rw-r--r--block/deadline-iosched.c7
-rw-r--r--block/elevator.c29
-rw-r--r--block/genhd.c104
-rw-r--r--block/partition-generic.c3
-rw-r--r--block/partitions/atari.c7
29 files changed, 1006 insertions, 826 deletions
diff --git a/block/Kconfig b/block/Kconfig
index d4c2ff4b9..6da79e670 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -89,19 +89,6 @@ config BLK_DEV_INTEGRITY
T10/SCSI Data Integrity Field or the T13/ATA External Path
Protection. If in doubt, say N.
-config BLK_DEV_DAX
- bool "Block device DAX support"
- depends on FS_DAX
- depends on BROKEN
- help
- When DAX support is available (CONFIG_FS_DAX) raw block
- devices can also support direct userspace access to the
- storage capacity via MMAP(2) similar to a file on a
- DAX-enabled filesystem. However, the DAX I/O-path disables
- some standard I/O-statistics, and the MMAP(2) path has some
- operational differences due to bypassing the page
- cache. If in doubt, say N.
-
config BLK_DEV_THROTTLING
bool "Block layer bio throttling support"
depends on BLK_CGROUP=y
diff --git a/block/Makefile b/block/Makefile
index 3f2bee907..b7aa613b4 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
- uuid.o genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
+ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
obj-$(CONFIG_BOUNCE) += bounce.o
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 32cd8ab1c..569988bda 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -7,7 +7,9 @@
* Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
* Paolo Valente <paolo.valente@unimore.it>
*
- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
*
* Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
* file.
@@ -162,6 +164,7 @@ static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
{
struct blkg_policy_data *pd = blkg_to_pd(blkg, &blkcg_policy_bfq);
+
return pd_to_bfqg(pd);
}
@@ -205,34 +208,38 @@ static void bfqg_put(struct bfq_group *bfqg)
static void bfqg_stats_update_io_add(struct bfq_group *bfqg,
struct bfq_queue *bfqq,
- int rw)
+ int op, int op_flags)
{
- blkg_rwstat_add(&bfqg->stats.queued, rw, 1);
+ blkg_rwstat_add(&bfqg->stats.queued, op, op_flags, 1);
bfqg_stats_end_empty_time(&bfqg->stats);
if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
}
-static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw)
+static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int op,
+ int op_flags)
{
- blkg_rwstat_add(&bfqg->stats.queued, rw, -1);
+ blkg_rwstat_add(&bfqg->stats.queued, op, op_flags, -1);
}
-static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw)
+static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int op,
+ int op_flags)
{
- blkg_rwstat_add(&bfqg->stats.merged, rw, 1);
+ blkg_rwstat_add(&bfqg->stats.merged, op, op_flags, 1);
}
static void bfqg_stats_update_completion(struct bfq_group *bfqg,
- uint64_t start_time, uint64_t io_start_time, int rw)
+ uint64_t start_time, uint64_t io_start_time, int op,
+ int op_flags)
{
struct bfqg_stats *stats = &bfqg->stats;
unsigned long long now = sched_clock();
if (time_after64(now, io_start_time))
- blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+ blkg_rwstat_add(&stats->service_time, op, op_flags,
+ now - io_start_time);
if (time_after64(io_start_time, start_time))
- blkg_rwstat_add(&stats->wait_time, rw,
+ blkg_rwstat_add(&stats->wait_time, op, op_flags,
io_start_time - start_time);
}
@@ -600,6 +607,10 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
lockdep_assert_held(bfqd->queue->queue_lock);
bfqg = bfq_find_set_group(bfqd, blkcg);
+
+ if (unlikely(!bfqg))
+ bfqg = bfqd->root_group;
+
if (async_bfqq) {
entity = &async_bfqq->entity;
@@ -1123,13 +1134,17 @@ static struct cftype bfq_blkg_files[] = {
#else /* CONFIG_BFQ_GROUP_IOSCHED */
static inline void bfqg_stats_update_io_add(struct bfq_group *bfqg,
- struct bfq_queue *bfqq, int rw) { }
-static inline void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw) { }
-static inline void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) { }
+ struct bfq_queue *bfqq, int op, int op_flags) { }
+static inline void
+bfqg_stats_update_io_remove(struct bfq_group *bfqg, int op, int op_flags) { }
+static inline void
+bfqg_stats_update_io_merged(struct bfq_group *bfqg, int op, int op_flags) { }
static inline void bfqg_stats_update_completion(struct bfq_group *bfqg,
- uint64_t start_time, uint64_t io_start_time, int rw) { }
-static inline void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
-struct bfq_group *curr_bfqg) { }
+ uint64_t start_time, uint64_t io_start_time, int op,
+ int op_flags) { }
+static inline void
+bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
+ struct bfq_group *curr_bfqg) { }
static inline void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { }
static inline void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
static inline void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
@@ -1141,6 +1156,7 @@ static void bfq_init_entity(struct bfq_entity *entity,
struct bfq_group *bfqg)
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
entity->weight = entity->new_weight;
entity->orig_weight = entity->new_weight;
if (bfqq) {
@@ -1154,6 +1170,7 @@ static struct bfq_group *
bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
{
struct bfq_data *bfqd = bic_to_bfqd(bic);
+
return bfqd->root_group;
}
@@ -1163,7 +1180,7 @@ static void bfq_end_wr_async(struct bfq_data *bfqd)
}
static struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
- struct blkcg *blkcg)
+ struct blkcg *blkcg)
{
return bfqd->root_group;
}
@@ -1173,7 +1190,8 @@ static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
return bfqq->bfqd->root_group;
}
-static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+static struct bfq_group *
+bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
{
struct bfq_group *bfqg;
int i;
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 76a701abf..9190a5554 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7,7 +7,9 @@
* Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
* Paolo Valente <paolo.valente@unimore.it>
*
- * Copyright (C) 2016 Paolo Valente <paolo.valente@unimore.it>
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
*
* Licensed under the GPL-2 as detailed in the accompanying COPYING.BFQ
* file.
@@ -71,8 +73,8 @@
#include "bfq.h"
#include "blk.h"
-/* Expiration time of sync (0) and async (1) requests, in jiffies. */
-static const int bfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
+/* Expiration time of sync (0) and async (1) requests, in ns. */
+static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
/* Maximum backwards seek, in KiB. */
static const int bfq_back_max = 16 * 1024;
@@ -80,8 +82,8 @@ static const int bfq_back_max = 16 * 1024;
/* Penalty of a backwards seek, in number of sectors. */
static const int bfq_back_penalty = 2;
-/* Idling period duration, in jiffies. */
-static int bfq_slice_idle = HZ / 125;
+/* Idling period duration, in ns. */
+static u64 bfq_slice_idle = NSEC_PER_SEC / 125;
/* Minimum number of assigned budgets for which stats are safe to compute. */
static const int bfq_stats_min_budgets = 194;
@@ -102,13 +104,13 @@ static const int bfq_timeout = HZ / 8;
struct kmem_cache *bfq_pool;
/* Below this threshold (in ms), we consider thinktime immediate. */
-#define BFQ_MIN_TT 2
+#define BFQ_MIN_TT (2 * NSEC_PER_MSEC)
/* hw_tag detection: parallel requests threshold and min samples needed. */
#define BFQ_HW_QUEUE_THRESHOLD 4
#define BFQ_HW_QUEUE_SAMPLES 32
-#define BFQQ_SEEK_THR (sector_t)(8 * 100)
+#define BFQQ_SEEK_THR (sector_t)(8 * 100)
#define BFQQ_CLOSE_THR (sector_t)(8 * 1024)
#define BFQQ_SEEKY(bfqq) (hweight32(bfqq->seek_history) > 32/8)
@@ -191,10 +193,7 @@ static void bfq_schedule_dispatch(struct bfq_data *bfqd);
*/
static int bfq_bio_sync(struct bio *bio)
{
- if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))
- return 1;
-
- return 0;
+ return bio_data_dir(bio) == READ || (bio->bi_opf & REQ_SYNC);
}
/*
@@ -223,7 +222,7 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
unsigned long back_max;
#define BFQ_RQ1_WRAP 0x01 /* request 1 wraps */
#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */
- unsigned wrap = 0; /* bit mask: requests behind the disk head? */
+ unsigned int wrap = 0; /* bit mask: requests behind the disk head? */
if (!rq1 || rq1 == rq2)
return rq2;
@@ -278,12 +277,11 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
return rq1;
else if (d2 < d1)
return rq2;
- else {
- if (s1 >= s2)
- return rq1;
- else
- return rq2;
- }
+
+ if (s1 >= s2)
+ return rq1;
+ else
+ return rq2;
case BFQ_RQ2_WRAP:
return rq1;
@@ -339,7 +337,7 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
*rb_link = p;
bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
- (long long unsigned)sector,
+ (unsigned long long) sector,
bfqq ? bfqq->pid : 0);
return bfqq;
@@ -690,7 +688,7 @@ static void bfq_add_to_burst(struct bfq_data *bfqd, struct bfq_queue *bfqq)
*/
hlist_for_each_entry(bfqq_item, &bfqd->burst_list,
burst_list_node) {
- bfq_mark_bfqq_in_large_burst(bfqq_item);
+ bfq_mark_bfqq_in_large_burst(bfqq_item);
bfq_log_bfqq(bfqd, bfqq_item, "marked in large burst");
}
bfq_mark_bfqq_in_large_burst(bfqq);
@@ -891,6 +889,7 @@ end:
static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
{
struct bfq_entity *entity = &bfqq->entity;
+
return entity->budget - entity->service;
}
@@ -1062,7 +1061,7 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
}
entity->budget = max_t(unsigned long, bfqq->max_budget,
- bfq_serv_to_charge(bfqq->next_rq,bfqq));
+ bfq_serv_to_charge(bfqq->next_rq, bfqq));
BUG_ON(entity->budget < 0);
bfq_clear_bfqq_non_blocking_wait_rq(bfqq);
@@ -1200,9 +1199,9 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
* bfq_bfqq_update_budg_for_activation for
* details on the usage of the next variable.
*/
- arrived_in_time = time_is_after_jiffies(
+ arrived_in_time = ktime_get_ns() <=
RQ_BIC(rq)->ttime.last_end_request +
- bfqd->bfq_slice_idle * 3);
+ bfqd->bfq_slice_idle * 3;
bfq_log_bfqq(bfqd, bfqq,
"bfq_add_request non-busy: "
@@ -1217,7 +1216,7 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
BUG_ON(bfqq == bfqd->in_service_queue);
bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq,
- rq->cmd_flags);
+ req_op(rq), rq->cmd_flags);
/*
* bfqq deserves to be weight-raised if:
@@ -1458,7 +1457,7 @@ static void bfq_activate_request(struct request_queue *q, struct request *rq)
bfqd->rq_in_driver++;
bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
bfq_log(bfqd, "activate_request: new bfqd->last_position %llu",
- (long long unsigned)bfqd->last_position);
+ (unsigned long long) bfqd->last_position);
}
static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
@@ -1523,7 +1522,8 @@ static void bfq_remove_request(struct request *rq)
BUG_ON(bfqq->meta_pending == 0);
bfqq->meta_pending--;
}
- bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
+ bfqg_stats_update_io_remove(bfqq_group(bfqq), req_op(rq),
+ rq->cmd_flags);
}
static int bfq_merge(struct request_queue *q, struct request **req,
@@ -1533,7 +1533,7 @@ static int bfq_merge(struct request_queue *q, struct request **req,
struct request *__rq;
__rq = bfq_find_rq_fmerge(bfqd, bio);
- if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
return ELEVATOR_FRONT_MERGE;
}
@@ -1578,7 +1578,8 @@ static void bfq_merged_request(struct request_queue *q, struct request *req,
static void bfq_bio_merged(struct request_queue *q, struct request *req,
struct bio *bio)
{
- bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_rw);
+ bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio_op(bio),
+ bio->bi_opf);
}
#endif
@@ -1598,7 +1599,7 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
*/
if (bfqq == next_bfqq &&
!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
- time_before(next->fifo_time, rq->fifo_time)) {
+ next->fifo_time < rq->fifo_time) {
list_del_init(&rq->queuelist);
list_replace_init(&next->queuelist, &rq->queuelist);
rq->fifo_time = next->fifo_time;
@@ -1608,7 +1609,8 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
bfqq->next_rq = rq;
bfq_remove_request(next);
- bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+ bfqg_stats_update_io_merged(bfqq_group(bfqq), req_op(next),
+ next->cmd_flags);
}
/* Must be called with bfqq != NULL */
@@ -1961,7 +1963,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
{
bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu",
- (long unsigned)new_bfqq->pid);
+ (unsigned long) new_bfqq->pid);
/* Save weight raising and idle window of the merged queues */
bfq_bfqq_save_state(bfqq);
bfq_bfqq_save_state(new_bfqq);
@@ -1983,11 +1985,10 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
new_bfqq->wr_cur_max_time = bfqq->wr_cur_max_time;
new_bfqq->last_wr_start_finish = bfqq->last_wr_start_finish;
if (bfq_bfqq_busy(new_bfqq))
- bfqd->wr_busy_queues++;
+ bfqd->wr_busy_queues++;
new_bfqq->entity.prio_changed = 1;
bfq_log_bfqq(bfqd, new_bfqq,
- "wr starting after merge with %d, "
- "rais_max_time %u",
+ "wr start after merge with %d, rais_max_time %u",
bfqq->pid,
jiffies_to_msecs(bfqq->wr_cur_max_time));
}
@@ -2028,8 +2029,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_put_queue(bfqq);
}
-static int bfq_allow_merge(struct request_queue *q, struct request *rq,
- struct bio *bio)
+static int bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
+ struct bio *bio)
{
struct bfq_data *bfqd = q->elevator->elevator_data;
struct bfq_io_cq *bic;
@@ -2039,7 +2040,7 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
* Disallow merge of a sync bio into an async request.
*/
if (bfq_bio_sync(bio) && !rq_is_sync(rq))
- return 0;
+ return false;
/*
* Lookup the bfqq that this bio will be queued with. Allow
@@ -2048,7 +2049,7 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
*/
bic = bfq_bic_lookup(bfqd, current->io_context);
if (!bic)
- return 0;
+ return false;
bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
/*
@@ -2071,6 +2072,12 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
return bfqq == RQ_BFQQ(rq);
}
+static int bfq_allow_rq_merge(struct request_queue *q, struct request *rq,
+ struct request *next)
+{
+ return RQ_BFQQ(rq) == RQ_BFQQ(next);
+}
+
/*
* Set the maximum time for the in-service queue to consume its
* budget. This prevents seeky processes from lowering the throughput.
@@ -2081,6 +2088,7 @@ static void bfq_set_budget_timeout(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
unsigned int timeout_coeff;
+
if (bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time)
timeout_coeff = 1;
else
@@ -2198,13 +2206,15 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
*/
if (BFQQ_SEEKY(bfqq) && bfqq->wr_coeff == 1 &&
bfq_symmetric_scenario(bfqd))
- sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
+ sl = min_t(u64, sl, BFQ_MIN_TT);
bfqd->last_idling_start = ktime_get();
- mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
+ hrtimer_start(&bfqd->idle_slice_timer, ns_to_ktime(sl),
+ HRTIMER_MODE_REL);
bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
- bfq_log(bfqd, "arm idle: %u/%u ms",
- jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
+ bfq_log(bfqd, "arm idle: %llu/%llu ms",
+ div_u64(sl, NSEC_PER_MSEC),
+ div_u64(bfqd->bfq_slice_idle, NSEC_PER_MSEC));
}
/*
@@ -2247,7 +2257,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
rq = rq_entry_fifo(bfqq->fifo.next);
- if (time_is_after_jiffies(rq->fifo_time))
+ if (ktime_get_ns() < rq->fifo_time)
return NULL;
return rq;
@@ -2468,7 +2478,7 @@ static unsigned long bfq_calc_max_budget(struct bfq_data *bfqd)
{
/*
* The max_budget calculated when autotuning is equal to the
- * amount of sectors transfered in timeout at the
+ * amount of sectors transferred in timeout at the
* estimated peak rate.
*/
return bfqd->peak_rate * 1000 * jiffies_to_msecs(bfqd->bfq_timeout) >>
@@ -2527,9 +2537,10 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
/* Don't trust short/unrealistic values. */
if (delta_usecs < 1000 || delta_usecs >= LONG_MAX) {
if (blk_queue_nonrot(bfqd->queue))
- *delta_ms = BFQ_MIN_TT; /* give same worst-case
- guarantees as
- idling for seeky
+ *delta_ms = BFQ_MIN_TT; /*
+ * give same worst-case
+ * guarantees as
+ * idling for seeky
*/
else /* Charge at least one seek */
*delta_ms = jiffies_to_msecs(bfq_slice_idle);
@@ -2537,7 +2548,7 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
}
delta_ms_tmp = delta_usecs;
- do_div(delta_ms_tmp, 1000);
+ do_div(delta_ms_tmp, NSEC_PER_MSEC);
*delta_ms = delta_ms_tmp;
/*
@@ -2602,6 +2613,7 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (bfqd->peak_rate_samples == BFQ_PEAK_RATE_SAMPLES &&
update) {
int dev_type = blk_queue_nonrot(bfqd->queue);
+
if (bfqd->bfq_user_max_budget == 0) {
bfqd->bfq_max_budget =
bfq_calc_max_budget(bfqd);
@@ -2619,9 +2631,9 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqd->RT_prod = R_fast[dev_type] *
T_fast[dev_type];
}
- bfq_log(bfqd, "dev_speed_class = %d (%d sects/sec), "
- "thresh %d setcs/sec",
- bfqd->device_speed,
+ bfq_log(bfqd,
+ "dev_type %d dev_speed_class = %d (%d sects/sec), thresh %d setcs/sec",
+ dev_type, bfqd->device_speed,
bfqd->device_speed == BFQ_BFQD_FAST ?
(1000000*R_fast[dev_type])>>BFQ_RATE_SHIFT :
(1000000*R_slow[dev_type])>>BFQ_RATE_SHIFT,
@@ -2641,8 +2653,7 @@ static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
}
bfq_log_bfqq(bfqd, bfqq,
- "update_peak_rate: bw %llu sect/s, peak rate %llu, "
- "slow %d",
+ "update_peak_rate: bw %llu sect/s, peak rate %llu, slow %d",
(1000000*bw)>>BFQ_RATE_SHIFT,
(1000000*bfqd->peak_rate)>>BFQ_RATE_SHIFT,
bw < bfqd->peak_rate / 2);
@@ -2706,9 +2717,7 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
bfq_log_bfqq(bfqd, bfqq,
- "softrt_next_start: service_blkg %lu "
- "soft_rate %u sects/sec"
- "interval %u",
+"softrt_next_start: service_blkg %lu soft_rate %u sects/sec interval %u",
bfqq->service_from_backlogged,
bfqd->bfq_wr_max_softrt_rate,
jiffies_to_msecs(HZ * bfqq->service_from_backlogged /
@@ -2717,7 +2726,7 @@ static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
return max(bfqq->last_idle_bklogged +
HZ * bfqq->service_from_backlogged /
bfqd->bfq_wr_max_softrt_rate,
- jiffies + bfqq->bfqd->bfq_slice_idle + 4);
+ jiffies + nsecs_to_jiffies(bfqq->bfqd->bfq_slice_idle) + 4);
}
/*
@@ -2816,7 +2825,7 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
BUG_ON(bfqq->entity.budget < bfqq->entity.service);
if (reason == BFQ_BFQQ_TOO_IDLE &&
- entity->service <= 2 * entity->budget / 10 )
+ entity->service <= 2 * entity->budget / 10)
bfq_clear_bfqq_IO_bound(bfqq);
if (bfqd->low_latency && bfqq->wr_coeff == 1)
@@ -3042,7 +3051,7 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* (i) each of these processes must get the same throughput as
* the others;
* (ii) all these processes have the same I/O pattern
- (either sequential or random).
+ * (either sequential or random).
* In fact, in such a scenario, the drive will tend to treat
* the requests of each of these processes in about the same
* way as the requests of the others, and thus to provide
@@ -3155,9 +3164,11 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* 2) idling either boosts the throughput (without issues), or
* is necessary to preserve service guarantees.
*/
- bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d "
- "wr_busy %d boosts %d IO-bound %d guar %d",
- bfq_bfqq_sync(bfqq), idling_boosts_thr,
+ bfq_log_bfqq(bfqd, bfqq, "may_idle: sync %d idling_boosts_thr %d",
+ bfq_bfqq_sync(bfqq), idling_boosts_thr);
+
+ bfq_log_bfqq(bfqd, bfqq,
+ "may_idle: wr_busy %d boosts %d IO-bound %d guar %d",
bfqd->wr_busy_queues,
idling_boosts_thr_without_issues,
bfq_bfqq_IO_bound(bfqq),
@@ -3204,7 +3215,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
if (bfq_may_expire_for_budg_timeout(bfqq) &&
- !timer_pending(&bfqd->idle_slice_timer) &&
+ !hrtimer_active(&bfqd->idle_slice_timer) &&
!bfq_bfqq_must_idle(bfqq))
goto expire;
@@ -3224,7 +3235,8 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
* not disable disk idling even when a new request
* arrives.
*/
- if (timer_pending(&bfqd->idle_slice_timer)) {
+ if (bfq_bfqq_wait_request(bfqq)) {
+ BUG_ON(!hrtimer_active(&bfqd->idle_slice_timer));
/*
* If we get here: 1) at least a new request
* has arrived but we have not disabled the
@@ -3239,7 +3251,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
* So we disable idling.
*/
bfq_clear_bfqq_wait_request(bfqq);
- del_timer(&bfqd->idle_slice_timer);
+ hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
bfqg_stats_update_idle_time(bfqq_group(bfqq));
}
goto keep_queue;
@@ -3251,7 +3263,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
* for a new request, or has requests waiting for a completion and
* may idle after their completion, then keep it anyway.
*/
- if (timer_pending(&bfqd->idle_slice_timer) ||
+ if (hrtimer_active(&bfqd->idle_slice_timer) ||
(bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) {
bfqq = NULL;
goto keep_queue;
@@ -3271,6 +3283,7 @@ keep_queue:
static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
struct bfq_entity *entity = &bfqq->entity;
+
if (bfqq->wr_coeff > 1) { /* queue is being weight-raised */
bfq_log_bfqq(bfqd, bfqq,
"raising period dur %u/%u msec, old coeff %u, w %d(%d)",
@@ -3376,7 +3389,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
bfq_log_bfqq(bfqd, bfqq,
"dispatched %u sec req (%llu), budg left %d",
blk_rq_sectors(rq),
- (long long unsigned)blk_rq_pos(rq),
+ (unsigned long long) blk_rq_pos(rq),
bfq_bfqq_budget_left(bfqq));
dispatched++;
@@ -3461,8 +3474,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
BUG_ON(bfqq->entity.budget < bfqq->entity.service);
- bfq_clear_bfqq_wait_request(bfqq);
- BUG_ON(timer_pending(&bfqd->idle_slice_timer));
+ BUG_ON(bfq_bfqq_wait_request(bfqq));
if (!bfq_dispatch_request(bfqd, bfqq))
return 0;
@@ -3554,9 +3566,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
static void bfq_init_icq(struct io_cq *icq)
{
- struct bfq_io_cq *bic = icq_to_bic(icq);
-
- bic->ttime.last_end_request = bfq_smallest_from_now();
+ icq_to_bic(icq)->ttime.last_end_request = ktime_get_ns() - (1ULL<<32);
}
static void bfq_exit_icq(struct io_cq *icq)
@@ -3620,8 +3630,8 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq,
}
if (bfqq->new_ioprio >= IOPRIO_BE_NR) {
- printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
- bfqq->new_ioprio);
+ pr_crit("bfq_set_next_ioprio_data: new_ioprio %d\n",
+ bfqq->new_ioprio);
BUG();
}
@@ -3735,7 +3745,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
rcu_read_lock();
- bfqg = bfq_find_set_group(bfqd,bio_blkcg(bio));
+ bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
if (!bfqg) {
bfqq = &bfqd->oom_bfqq;
goto out;
@@ -3784,13 +3794,15 @@ out:
static void bfq_update_io_thinktime(struct bfq_data *bfqd,
struct bfq_io_cq *bic)
{
- unsigned long elapsed = jiffies - bic->ttime.last_end_request;
- unsigned long ttime = min(elapsed, 2UL * bfqd->bfq_slice_idle);
+ struct bfq_ttime *ttime = &bic->ttime;
+ u64 elapsed = ktime_get_ns() - bic->ttime.last_end_request;
+
+ elapsed = min(elapsed, 2UL * bfqd->bfq_slice_idle);
- bic->ttime.ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;
- bic->ttime.ttime_total = (7*bic->ttime.ttime_total + 256*ttime) / 8;
- bic->ttime.ttime_mean = (bic->ttime.ttime_total + 128) /
- bic->ttime.ttime_samples;
+ ttime->ttime_samples = (7*bic->ttime.ttime_samples + 256) / 8;
+ ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8);
+ ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
+ ttime->ttime_samples);
}
@@ -3799,6 +3811,7 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
struct request *rq)
{
sector_t sdist = 0;
+
if (bfqq->last_request_pos) {
if (bfqq->last_request_pos < blk_rq_pos(rq))
sdist = blk_rq_pos(rq) - bfqq->last_request_pos;
@@ -3906,7 +3919,7 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* timer.
*/
bfq_clear_bfqq_wait_request(bfqq);
- del_timer(&bfqd->idle_slice_timer);
+ hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
bfqg_stats_update_idle_time(bfqq_group(bfqq));
/*
@@ -3964,7 +3977,8 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
bfq_add_request(rq);
- rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
+ rq->fifo_time = ktime_get_ns() +
+ jiffies_to_nsecs(bfqd->bfq_fifo_expire[rq_is_sync(rq)]);
list_add_tail(&rq->queuelist, &bfqq->fifo);
bfq_rq_enqueued(bfqd, bfqq, rq);
@@ -4012,7 +4026,8 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
bfqq->dispatched--;
bfqg_stats_update_completion(bfqq_group(bfqq),
rq_start_time_ns(rq),
- rq_io_start_time_ns(rq), rq->cmd_flags);
+ rq_io_start_time_ns(rq), req_op(rq),
+ rq->cmd_flags);
if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
BUG_ON(!RB_EMPTY_ROOT(&bfqq->sort_list));
@@ -4028,7 +4043,7 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
&bfqd->queue_weights_tree);
}
- RQ_BIC(rq)->ttime.last_end_request = jiffies;
+ RQ_BIC(rq)->ttime.last_end_request = ktime_get_ns();
/*
* If we are waiting to discover whether the request pattern
@@ -4079,7 +4094,7 @@ static int __bfq_may_queue(struct bfq_queue *bfqq)
return ELV_MQUEUE_MAY;
}
-static int bfq_may_queue(struct request_queue *q, int rw)
+static int bfq_may_queue(struct request_queue *q, int op, int op_flags)
{
struct bfq_data *bfqd = q->elevator->elevator_data;
struct task_struct *tsk = current;
@@ -4096,7 +4111,7 @@ static int bfq_may_queue(struct request_queue *q, int rw)
if (!bic)
return ELV_MQUEUE_MAY;
- bfqq = bic_to_bfqq(bic, rw_is_sync(rw));
+ bfqq = bic_to_bfqq(bic, rw_is_sync(op, op_flags));
if (bfqq)
return __bfq_may_queue(bfqq);
@@ -4279,9 +4294,10 @@ static void bfq_kick_queue(struct work_struct *work)
* Handler of the expiration of the timer running if the in-service queue
* is idling inside its time slice.
*/
-static void bfq_idle_slice_timer(unsigned long data)
+static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer)
{
- struct bfq_data *bfqd = (struct bfq_data *)data;
+ struct bfq_data *bfqd = container_of(timer, struct bfq_data,
+ idle_slice_timer);
struct bfq_queue *bfqq;
unsigned long flags;
enum bfqq_expiration reason;
@@ -4299,6 +4315,8 @@ static void bfq_idle_slice_timer(unsigned long data)
*/
if (bfqq) {
bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
+ bfq_clear_bfqq_wait_request(bfqq);
+
if (bfq_bfqq_budget_timeout(bfqq))
/*
* Also here the queue can be safely expired
@@ -4324,11 +4342,12 @@ schedule_dispatch:
bfq_schedule_dispatch(bfqd);
spin_unlock_irqrestore(bfqd->queue->queue_lock, flags);
+ return HRTIMER_NORESTART;
}
static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
{
- del_timer_sync(&bfqd->idle_slice_timer);
+ hrtimer_cancel(&bfqd->idle_slice_timer);
cancel_work_sync(&bfqd->unplug_work);
}
@@ -4385,7 +4404,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
bfq_shutdown_timer_wq(bfqd);
- BUG_ON(timer_pending(&bfqd->idle_slice_timer));
+ BUG_ON(hrtimer_active(&bfqd->idle_slice_timer));
#ifdef CONFIG_BFQ_GROUP_IOSCHED
blkcg_deactivate_policy(q, &blkcg_policy_bfq);
@@ -4460,9 +4479,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
- init_timer(&bfqd->idle_slice_timer);
+ hrtimer_init(&bfqd->idle_slice_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
- bfqd->idle_slice_timer.data = (unsigned long)bfqd;
bfqd->queue_weights_tree = RB_ROOT;
bfqd->group_weights_tree = RB_ROOT;
@@ -4528,8 +4547,7 @@ out_free:
static void bfq_slab_kill(void)
{
- if (bfq_pool)
- kmem_cache_destroy(bfq_pool);
+ kmem_cache_destroy(bfq_pool);
}
static int __init bfq_slab_setup(void)
@@ -4542,7 +4560,7 @@ static int __init bfq_slab_setup(void)
static ssize_t bfq_var_show(unsigned int var, char *page)
{
- return sprintf(page, "%d\n", var);
+ return sprintf(page, "%u\n", var);
}
static ssize_t bfq_var_store(unsigned long *var, const char *page,
@@ -4560,6 +4578,7 @@ static ssize_t bfq_var_store(unsigned long *var, const char *page,
static ssize_t bfq_wr_max_time_show(struct elevator_queue *e, char *page)
{
struct bfq_data *bfqd = e->elevator_data;
+
return sprintf(page, "%d\n", bfqd->bfq_wr_max_time > 0 ?
jiffies_to_msecs(bfqd->bfq_wr_max_time) :
jiffies_to_msecs(bfq_wr_duration(bfqd)));
@@ -4578,25 +4597,29 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
num_char += sprintf(page + num_char, "Active:\n");
list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) {
- num_char += sprintf(page + num_char,
- "pid%d: weight %hu, nr_queued %d %d, dur %d/%u\n",
- bfqq->pid,
- bfqq->entity.weight,
- bfqq->queued[0],
- bfqq->queued[1],
- jiffies_to_msecs(jiffies - bfqq->last_wr_start_finish),
- jiffies_to_msecs(bfqq->wr_cur_max_time));
+ num_char += sprintf(page + num_char,
+ "pid%d: weight %hu, nr_queued %d %d, ",
+ bfqq->pid,
+ bfqq->entity.weight,
+ bfqq->queued[0],
+ bfqq->queued[1]);
+ num_char += sprintf(page + num_char,
+ "dur %d/%u\n",
+ jiffies_to_msecs(
+ jiffies -
+ bfqq->last_wr_start_finish),
+ jiffies_to_msecs(bfqq->wr_cur_max_time));
}
num_char += sprintf(page + num_char, "Idle:\n");
list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) {
- num_char += sprintf(page + num_char,
- "pid%d: weight %hu, dur %d/%u\n",
- bfqq->pid,
- bfqq->entity.weight,
- jiffies_to_msecs(jiffies -
- bfqq->last_wr_start_finish),
- jiffies_to_msecs(bfqq->wr_cur_max_time));
+ num_char += sprintf(page + num_char,
+ "pid%d: weight %hu, dur %d/%u\n",
+ bfqq->pid,
+ bfqq->entity.weight,
+ jiffies_to_msecs(jiffies -
+ bfqq->last_wr_start_finish),
+ jiffies_to_msecs(bfqq->wr_cur_max_time));
}
spin_unlock_irq(bfqd->queue->queue_lock);
@@ -4608,16 +4631,18 @@ static ssize_t bfq_weights_show(struct elevator_queue *e, char *page)
static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct bfq_data *bfqd = e->elevator_data; \
- unsigned int __data = __VAR; \
- if (__CONV) \
+ u64 __data = __VAR; \
+ if (__CONV == 1) \
__data = jiffies_to_msecs(__data); \
+ else if (__CONV == 2) \
+ __data = div_u64(__data, NSEC_PER_MSEC); \
return bfq_var_show(__data, (page)); \
}
-SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 1);
-SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 1);
+SHOW_FUNCTION(bfq_fifo_expire_sync_show, bfqd->bfq_fifo_expire[1], 2);
+SHOW_FUNCTION(bfq_fifo_expire_async_show, bfqd->bfq_fifo_expire[0], 2);
SHOW_FUNCTION(bfq_back_seek_max_show, bfqd->bfq_back_max, 0);
SHOW_FUNCTION(bfq_back_seek_penalty_show, bfqd->bfq_back_penalty, 0);
-SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 1);
+SHOW_FUNCTION(bfq_slice_idle_show, bfqd->bfq_slice_idle, 2);
SHOW_FUNCTION(bfq_max_budget_show, bfqd->bfq_user_max_budget, 0);
SHOW_FUNCTION(bfq_timeout_sync_show, bfqd->bfq_timeout, 1);
SHOW_FUNCTION(bfq_strict_guarantees_show, bfqd->strict_guarantees, 0);
@@ -4630,6 +4655,17 @@ SHOW_FUNCTION(bfq_wr_min_inter_arr_async_show, bfqd->bfq_wr_min_inter_arr_async,
SHOW_FUNCTION(bfq_wr_max_softrt_rate_show, bfqd->bfq_wr_max_softrt_rate, 0);
#undef SHOW_FUNCTION
+#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
+{ \
+ struct bfq_data *bfqd = e->elevator_data; \
+ u64 __data = __VAR; \
+ __data = div_u64(__data, NSEC_PER_USEC); \
+ return bfq_var_show(__data, (page)); \
+}
+USEC_SHOW_FUNCTION(bfq_slice_idle_us_show, bfqd->bfq_slice_idle);
+#undef USEC_SHOW_FUNCTION
+
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
static ssize_t \
__FUNC(struct elevator_queue *e, const char *page, size_t count) \
@@ -4641,20 +4677,22 @@ __FUNC(struct elevator_queue *e, const char *page, size_t count) \
__data = (MIN); \
else if (__data > (MAX)) \
__data = (MAX); \
- if (__CONV) \
+ if (__CONV == 1) \
*(__PTR) = msecs_to_jiffies(__data); \
+ else if (__CONV == 2) \
+ *(__PTR) = (u64)__data * NSEC_PER_MSEC; \
else \
*(__PTR) = __data; \
return ret; \
}
STORE_FUNCTION(bfq_fifo_expire_sync_store, &bfqd->bfq_fifo_expire[1], 1,
- INT_MAX, 1);
+ INT_MAX, 2);
STORE_FUNCTION(bfq_fifo_expire_async_store, &bfqd->bfq_fifo_expire[0], 1,
- INT_MAX, 1);
+ INT_MAX, 2);
STORE_FUNCTION(bfq_back_seek_max_store, &bfqd->bfq_back_max, 0, INT_MAX, 0);
STORE_FUNCTION(bfq_back_seek_penalty_store, &bfqd->bfq_back_penalty, 1,
INT_MAX, 0);
-STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 1);
+STORE_FUNCTION(bfq_slice_idle_store, &bfqd->bfq_slice_idle, 0, INT_MAX, 2);
STORE_FUNCTION(bfq_wr_coeff_store, &bfqd->bfq_wr_coeff, 1, INT_MAX, 0);
STORE_FUNCTION(bfq_wr_max_time_store, &bfqd->bfq_wr_max_time, 0, INT_MAX, 1);
STORE_FUNCTION(bfq_wr_rt_max_time_store, &bfqd->bfq_wr_rt_max_time, 0, INT_MAX,
@@ -4667,6 +4705,23 @@ STORE_FUNCTION(bfq_wr_max_softrt_rate_store, &bfqd->bfq_wr_max_softrt_rate, 0,
INT_MAX, 0);
#undef STORE_FUNCTION
+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)\
+{ \
+ struct bfq_data *bfqd = e->elevator_data; \
+ unsigned long __data; \
+ int ret = bfq_var_store(&__data, (page), count); \
+ if (__data < (MIN)) \
+ __data = (MIN); \
+ else if (__data > (MAX)) \
+ __data = (MAX); \
+ *(__PTR) = (u64)__data * NSEC_PER_USEC; \
+ return ret; \
+}
+USEC_STORE_FUNCTION(bfq_slice_idle_us_store, &bfqd->bfq_slice_idle, 0,
+ UINT_MAX);
+#undef USEC_STORE_FUNCTION
+
/* do nothing for the moment */
static ssize_t bfq_weights_store(struct elevator_queue *e,
const char *page, size_t count)
@@ -4768,6 +4823,7 @@ static struct elv_fs_entry bfq_attrs[] = {
BFQ_ATTR(back_seek_max),
BFQ_ATTR(back_seek_penalty),
BFQ_ATTR(slice_idle),
+ BFQ_ATTR(slice_idle_us),
BFQ_ATTR(max_budget),
BFQ_ATTR(timeout_sync),
BFQ_ATTR(strict_guarantees),
@@ -4790,7 +4846,8 @@ static struct elevator_type iosched_bfq = {
#ifdef CONFIG_BFQ_GROUP_IOSCHED
.elevator_bio_merged_fn = bfq_bio_merged,
#endif
- .elevator_allow_merge_fn = bfq_allow_merge,
+ .elevator_allow_bio_merge_fn = bfq_allow_bio_merge,
+ .elevator_allow_rq_merge_fn = bfq_allow_rq_merge,
.elevator_dispatch_fn = bfq_dispatch_requests,
.elevator_add_req_fn = bfq_insert_request,
.elevator_activate_req_fn = bfq_activate_request,
@@ -4836,12 +4893,6 @@ static int __init bfq_init(void)
int ret;
char msg[50] = "BFQ I/O-scheduler: v8r3";
- /*
- * Can be 0 on HZ < 1000 setups.
- */
- if (bfq_slice_idle == 0)
- bfq_slice_idle = 1;
-
#ifdef CONFIG_BFQ_GROUP_IOSCHED
ret = blkcg_policy_register(&blkcg_policy_bfq);
if (ret)
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index 475a9a6e1..f8960a4e9 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -7,7 +7,9 @@
* Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
* Paolo Valente <paolo.valente@unimore.it>
*
- * Copyright (C) 2016 Paolo Valente <paolo.valente@unimore.it>
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
*/
static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
@@ -167,7 +169,8 @@ static u64 bfq_delta(unsigned long service, unsigned long weight)
static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
- unsigned long long start, finish, delta ;
+ unsigned long long start, finish, delta;
+
BUG_ON(entity->weight == 0);
entity->finish = entity->start +
@@ -428,7 +431,7 @@ static void bfq_active_insert(struct bfq_service_tree *st,
static unsigned short bfq_ioprio_to_weight(int ioprio)
{
BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
- return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF ;
+ return (IOPRIO_BE_NR - ioprio) * BFQ_WEIGHT_CONVERSION_COEFF;
}
/**
@@ -1243,6 +1246,12 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
BUG_ON(sd->in_service_entity);
+ /*
+ * Choose from idle class, if needed to guarantee a minimum
+ * bandwidth to this class. This should also mitigate
+ * priority-inversion problems in case a low priority task is
+ * holding file system resources.
+ */
if (bfqd &&
jiffies - bfqd->bfq_class_idle_last_service >
BFQ_CL_IDLE_TIMEOUT) {
@@ -1250,11 +1259,12 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
true);
if (entity) {
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
if (bfqq)
bfq_log_bfqq(bfqd, bfqq,
"idle chosen from st %p %d",
st + BFQ_IOPRIO_CLASSES - 1,
- BFQ_IOPRIO_CLASSES - 1) ;
+ BFQ_IOPRIO_CLASSES - 1);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
else {
struct bfq_group *bfqg =
@@ -1263,7 +1273,7 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
bfq_log_bfqg(bfqd, bfqg,
"idle chosen from st %p %d",
st + BFQ_IOPRIO_CLASSES - 1,
- BFQ_IOPRIO_CLASSES - 1) ;
+ BFQ_IOPRIO_CLASSES - 1);
}
#endif
i = BFQ_IOPRIO_CLASSES - 1;
@@ -1276,10 +1286,11 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
if (entity) {
if (bfqd != NULL) {
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+
if (bfqq)
bfq_log_bfqq(bfqd, bfqq,
"chosen from st %p %d",
- st + i, i) ;
+ st + i, i);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
else {
struct bfq_group *bfqg =
@@ -1287,7 +1298,7 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
bfq_log_bfqg(bfqd, bfqg,
"chosen from st %p %d",
- st + i, i) ;
+ st + i, i);
}
#endif
}
@@ -1375,8 +1386,9 @@ static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
bfqd->in_service_bic = NULL;
}
+ bfq_clear_bfqq_wait_request(bfqd->in_service_queue);
+ hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
bfqd->in_service_queue = NULL;
- del_timer(&bfqd->idle_slice_timer);
}
static void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
diff --git a/block/bfq.h b/block/bfq.h
index 49d28b9dc..00142fcce 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
/*
- * BFQ-v8r3 for 4.7.0: data structures and common functions prototypes.
+ * BFQ-v8r3 for 4.8.0: data structures and common functions prototypes.
*
* Based on ideas and code from CFQ:
* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
@@ -7,7 +7,9 @@
* Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
* Paolo Valente <paolo.valente@unimore.it>
*
- * Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ *
+ * Copyright (C) 2016 Paolo Valente <paolo.valente@linaro.org>
*/
#ifndef _BFQ_H
@@ -55,8 +57,8 @@ struct bfq_service_tree {
/* tree for idle entities (i.e., not backlogged, with V <= F_i)*/
struct rb_root idle;
- struct bfq_entity *first_idle; /* idle entity with minimum F_i */
- struct bfq_entity *last_idle; /* idle entity with maximum F_i */
+ struct bfq_entity *first_idle; /* idle entity with minimum F_i */
+ struct bfq_entity *last_idle; /* idle entity with maximum F_i */
u64 vtime; /* scheduler virtual time */
/* scheduler weight sum; active and idle entities contribute to it */
@@ -158,7 +160,7 @@ struct bfq_entity {
/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
int budget;
- unsigned int weight; /* weight of the queue */
+ unsigned int weight; /* weight of the queue */
unsigned int new_weight; /* next weight if a change is in progress */
/* original weight, used to implement weight boosting */
@@ -304,11 +306,11 @@ struct bfq_queue {
* struct bfq_ttime - per process thinktime stats.
*/
struct bfq_ttime {
- unsigned long last_end_request; /* completion time of last request */
+ u64 last_end_request; /* completion time of last request */
- unsigned long ttime_total; /* total process thinktime */
+ u64 ttime_total; /* total process thinktime */
unsigned long ttime_samples; /* number of thinktime samples */
- unsigned long ttime_mean; /* average process thinktime */
+ u64 ttime_mean; /* average process thinktime */
};
@@ -417,7 +419,7 @@ struct bfq_data {
* Timer set when idling (waiting) for the next request from
* the queue in service.
*/
- struct timer_list idle_slice_timer;
+ struct hrtimer idle_slice_timer;
/* delayed work to restart dispatching on the request queue */
struct work_struct unplug_work;
@@ -449,13 +451,13 @@ struct bfq_data {
* Timeout for async/sync requests; when it fires, requests
* are served in fifo order.
*/
- unsigned int bfq_fifo_expire[2];
+ u64 bfq_fifo_expire[2];
/* weight of backward seeks wrt forward ones */
unsigned int bfq_back_penalty;
/* maximum allowed backward seek */
unsigned int bfq_back_max;
/* maximum idling time */
- unsigned int bfq_slice_idle;
+ u64 bfq_slice_idle;
/* last time CLASS_IDLE was served */
u64 bfq_class_idle_last_service;
@@ -788,7 +790,7 @@ bfq_entity_service_tree(struct bfq_entity *entity)
if (bfqq)
bfq_log_bfqq(bfqq->bfqd, bfqq,
"entity_service_tree %p %d",
- sched_data->service_tree + idx, idx) ;
+ sched_data->service_tree + idx, idx);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
else {
struct bfq_group *bfqg =
@@ -796,7 +798,7 @@ bfq_entity_service_tree(struct bfq_entity *entity)
bfq_log_bfqg((struct bfq_data *)bfqg->bfqd, bfqg,
"entity_service_tree %p %d",
- sched_data->service_tree + idx, idx) ;
+ sched_data->service_tree + idx, idx);
}
#endif
return sched_data->service_tree + idx;
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 711e4d8de..63f72f00c 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -26,6 +26,7 @@
#include <linux/bio.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
+#include "blk.h"
#define BIP_INLINE_VECS 4
@@ -53,7 +54,6 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
{
struct bio_integrity_payload *bip;
struct bio_set *bs = bio->bi_pool;
- unsigned long idx = BIO_POOL_NONE;
unsigned inline_vecs;
if (!bs || !bs->bio_integrity_pool) {
@@ -71,20 +71,22 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
memset(bip, 0, sizeof(*bip));
if (nr_vecs > inline_vecs) {
+ unsigned long idx = 0;
+
bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
bs->bvec_integrity_pool);
if (!bip->bip_vec)
goto err;
bip->bip_max_vcnt = bvec_nr_vecs(idx);
+ bip->bip_slab = idx;
} else {
bip->bip_vec = bip->bip_inline_vecs;
bip->bip_max_vcnt = inline_vecs;
}
- bip->bip_slab = idx;
bip->bip_bio = bio;
bio->bi_integrity = bip;
- bio->bi_rw |= REQ_INTEGRITY;
+ bio->bi_opf |= REQ_INTEGRITY;
return bip;
err:
@@ -110,9 +112,7 @@ void bio_integrity_free(struct bio *bio)
bip->bip_vec->bv_offset);
if (bs && bs->bio_integrity_pool) {
- if (bip->bip_slab != BIO_POOL_NONE)
- bvec_free(bs->bvec_integrity_pool, bip->bip_vec,
- bip->bip_slab);
+ bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
mempool_free(bip, bs->bio_integrity_pool);
} else {
diff --git a/block/bio.c b/block/bio.c
index 462386908..aa7354088 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -43,7 +43,7 @@
* unsigned short
*/
#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
-static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
+static struct biovec_slab bvec_slabs[BVEC_POOL_NR] __read_mostly = {
BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
};
#undef BV
@@ -160,11 +160,15 @@ unsigned int bvec_nr_vecs(unsigned short idx)
void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx)
{
- BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
+ if (!idx)
+ return;
+ idx--;
+
+ BIO_BUG_ON(idx >= BVEC_POOL_NR);
- if (idx == BIOVEC_MAX_IDX)
+ if (idx == BVEC_POOL_MAX) {
mempool_free(bv, pool);
- else {
+ } else {
struct biovec_slab *bvs = bvec_slabs + idx;
kmem_cache_free(bvs->slab, bv);
@@ -206,7 +210,7 @@ struct bio_vec *bvec_alloc(gfp_t gfp_mask, int nr, unsigned long *idx,
* idx now points to the pool we want to allocate from. only the
* 1-vec entry pool is mempool backed.
*/
- if (*idx == BIOVEC_MAX_IDX) {
+ if (*idx == BVEC_POOL_MAX) {
fallback:
bvl = mempool_alloc(pool, gfp_mask);
} else {
@@ -226,11 +230,12 @@ fallback:
*/
bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
- *idx = BIOVEC_MAX_IDX;
+ *idx = BVEC_POOL_MAX;
goto fallback;
}
}
+ (*idx)++;
return bvl;
}
@@ -250,8 +255,7 @@ static void bio_free(struct bio *bio)
__bio_free(bio);
if (bs) {
- if (bio_flagged(bio, BIO_OWNS_VEC))
- bvec_free(bs->bvec_pool, bio->bi_io_vec, BIO_POOL_IDX(bio));
+ bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
/*
* If we have front padding, adjust the bio pointer before freeing
@@ -420,7 +424,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
gfp_t saved_gfp = gfp_mask;
unsigned front_pad;
unsigned inline_vecs;
- unsigned long idx = BIO_POOL_NONE;
struct bio_vec *bvl = NULL;
struct bio *bio;
void *p;
@@ -480,6 +483,8 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
bio_init(bio);
if (nr_iovecs > inline_vecs) {
+ unsigned long idx = 0;
+
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
if (!bvl && gfp_mask != saved_gfp) {
punt_bios_to_rescuer(bs);
@@ -490,13 +495,12 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
if (unlikely(!bvl))
goto err_free;
- bio_set_flag(bio, BIO_OWNS_VEC);
+ bio->bi_flags |= idx << BVEC_POOL_OFFSET;
} else if (nr_iovecs) {
bvl = bio->bi_inline_vecs;
}
bio->bi_pool = bs;
- bio->bi_flags |= idx << BIO_POOL_OFFSET;
bio->bi_max_vecs = nr_iovecs;
bio->bi_io_vec = bvl;
return bio;
@@ -568,7 +572,7 @@ EXPORT_SYMBOL(bio_phys_segments);
*/
void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
{
- BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
+ BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio));
/*
* most users will be overriding ->bi_bdev with a new target,
@@ -576,7 +580,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
*/
bio->bi_bdev = bio_src->bi_bdev;
bio_set_flag(bio, BIO_CLONED);
- bio->bi_rw = bio_src->bi_rw;
+ bio->bi_opf = bio_src->bi_opf;
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
@@ -658,24 +662,24 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
if (!bio)
return NULL;
-
bio->bi_bdev = bio_src->bi_bdev;
- bio->bi_rw = bio_src->bi_rw;
+ bio->bi_opf = bio_src->bi_opf;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
- if (bio->bi_rw & REQ_DISCARD)
- goto integrity_clone;
-
- if (bio->bi_rw & REQ_WRITE_SAME) {
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ break;
+ case REQ_OP_WRITE_SAME:
bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
- goto integrity_clone;
+ break;
+ default:
+ bio_for_each_segment(bv, bio_src, iter)
+ bio->bi_io_vec[bio->bi_vcnt++] = bv;
+ break;
}
- bio_for_each_segment(bv, bio_src, iter)
- bio->bi_io_vec[bio->bi_vcnt++] = bv;
-
-integrity_clone:
if (bio_integrity(bio_src)) {
int ret;
@@ -858,21 +862,20 @@ static void submit_bio_wait_endio(struct bio *bio)
/**
* submit_bio_wait - submit a bio, and wait until it completes
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
* @bio: The &struct bio which describes the I/O
*
* Simple wrapper around submit_bio(). Returns 0 on success, or the error from
* bio_endio() on failure.
*/
-int submit_bio_wait(int rw, struct bio *bio)
+int submit_bio_wait(struct bio *bio)
{
struct submit_bio_ret ret;
- rw |= REQ_SYNC;
init_completion(&ret.event);
bio->bi_private = &ret;
bio->bi_end_io = submit_bio_wait_endio;
- submit_bio(rw, bio);
+ bio->bi_opf |= REQ_SYNC;
+ submit_bio(bio);
wait_for_completion_io(&ret.event);
return ret.error;
@@ -1103,7 +1106,6 @@ int bio_uncopy_user(struct bio *bio)
bio_put(bio);
return ret;
}
-EXPORT_SYMBOL(bio_uncopy_user);
/**
* bio_copy_user_iov - copy user data to bio
@@ -1171,7 +1173,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
goto out_bmd;
if (iter->type & WRITE)
- bio->bi_rw |= REQ_WRITE;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = 0;
@@ -1341,7 +1343,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
* set data direction, and check if mapped pages need bouncing
*/
if (iter->type & WRITE)
- bio->bi_rw |= REQ_WRITE;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
bio_set_flag(bio, BIO_USER_MAPPED);
@@ -1398,7 +1400,6 @@ void bio_unmap_user(struct bio *bio)
__bio_unmap_user(bio);
bio_put(bio);
}
-EXPORT_SYMBOL(bio_unmap_user);
static void bio_map_kern_endio(struct bio *bio)
{
@@ -1534,7 +1535,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
bio->bi_private = data;
} else {
bio->bi_end_io = bio_copy_kern_endio;
- bio->bi_rw |= REQ_WRITE;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
}
return bio;
@@ -1544,7 +1545,6 @@ cleanup:
bio_put(bio);
return ERR_PTR(-ENOMEM);
}
-EXPORT_SYMBOL(bio_copy_kern);
/*
* bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
@@ -1789,7 +1789,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
* Discards need a mutable bio_vec to accommodate the payload
* required by the DSM TRIM and UNMAP commands.
*/
- if (bio->bi_rw & REQ_DISCARD)
+ if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
split = bio_clone_bioset(bio, gfp, bs);
else
split = bio_clone_fast(bio, gfp, bs);
@@ -1838,7 +1838,7 @@ EXPORT_SYMBOL_GPL(bio_trim);
*/
mempool_t *biovec_create_pool(int pool_entries)
{
- struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
+ struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
return mempool_create_slab_pool(pool_entries, bp->slab);
}
@@ -2026,7 +2026,7 @@ static void __init biovec_init_slabs(void)
{
int i;
- for (i = 0; i < BIOVEC_NR_POOLS; i++) {
+ for (i = 0; i < BVEC_POOL_NR; i++) {
int size;
struct biovec_slab *bvs = bvec_slabs + i;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 66e6f1aae..dd38e5ced 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -905,7 +905,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
return 0;
}
-struct cftype blkcg_files[] = {
+static struct cftype blkcg_files[] = {
{
.name = "stat",
.flags = CFTYPE_NOT_ON_ROOT,
@@ -914,7 +914,7 @@ struct cftype blkcg_files[] = {
{ } /* terminate */
};
-struct cftype blkcg_legacy_files[] = {
+static struct cftype blkcg_legacy_files[] = {
{
.name = "reset_stats",
.write_u64 = blkcg_reset_stats,
diff --git a/block/blk-core.c b/block/blk-core.c
index 7bcf0e30a..cdcb188e8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -49,8 +49,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
DEFINE_IDA(blk_queue_ida);
-int trap_non_toi_io;
-
/*
* For the allocated request tables
*/
@@ -966,10 +964,10 @@ static void __freed_request(struct request_list *rl, int sync)
* A request has just been released. Account for it, update the full and
* congestion status, wake up any waiters. Called under q->queue_lock.
*/
-static void freed_request(struct request_list *rl, unsigned int flags)
+static void freed_request(struct request_list *rl, int op, unsigned int flags)
{
struct request_queue *q = rl->q;
- int sync = rw_is_sync(flags);
+ int sync = rw_is_sync(op, flags);
q->nr_rqs[sync]--;
rl->count[sync]--;
@@ -1036,7 +1034,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
* Flush requests do not use the elevator so skip initialization.
* This allows a request to share the flush and elevator data.
*/
- if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
+ if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA))
return false;
return true;
@@ -1061,7 +1059,8 @@ static struct io_context *rq_ioc(struct bio *bio)
/**
* __get_request - get a free request
* @rl: request list to allocate from
- * @rw_flags: RW and SYNC flags
+ * @op: REQ_OP_READ/REQ_OP_WRITE
+ * @op_flags: rq_flag_bits
* @bio: bio to allocate request for (can be %NULL)
* @gfp_mask: allocation mask
*
@@ -1072,21 +1071,22 @@ static struct io_context *rq_ioc(struct bio *bio)
* Returns ERR_PTR on failure, with @q->queue_lock held.
* Returns request pointer on success, with @q->queue_lock *not held*.
*/
-static struct request *__get_request(struct request_list *rl, int rw_flags,
- struct bio *bio, gfp_t gfp_mask)
+static struct request *__get_request(struct request_list *rl, int op,
+ int op_flags, struct bio *bio,
+ gfp_t gfp_mask)
{
struct request_queue *q = rl->q;
struct request *rq;
struct elevator_type *et = q->elevator->type;
struct io_context *ioc = rq_ioc(bio);
struct io_cq *icq = NULL;
- const bool is_sync = rw_is_sync(rw_flags) != 0;
+ const bool is_sync = rw_is_sync(op, op_flags) != 0;
int may_queue;
if (unlikely(blk_queue_dying(q)))
return ERR_PTR(-ENODEV);
- may_queue = elv_may_queue(q, rw_flags);
+ may_queue = elv_may_queue(q, op, op_flags);
if (may_queue == ELV_MQUEUE_NO)
goto rq_starved;
@@ -1130,7 +1130,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
/*
* Decide whether the new request will be managed by elevator. If
- * so, mark @rw_flags and increment elvpriv. Non-zero elvpriv will
+ * so, mark @op_flags and increment elvpriv. Non-zero elvpriv will
* prevent the current elevator from being destroyed until the new
* request is freed. This guarantees icq's won't be destroyed and
* makes creating new ones safe.
@@ -1139,14 +1139,14 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
* it will be created after releasing queue_lock.
*/
if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
- rw_flags |= REQ_ELVPRIV;
+ op_flags |= REQ_ELVPRIV;
q->nr_rqs_elvpriv++;
if (et->icq_cache && ioc)
icq = ioc_lookup_icq(ioc, q);
}
if (blk_queue_io_stat(q))
- rw_flags |= REQ_IO_STAT;
+ op_flags |= REQ_IO_STAT;
spin_unlock_irq(q->queue_lock);
/* allocate and init request */
@@ -1156,10 +1156,10 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
blk_rq_init(q, rq);
blk_rq_set_rl(rq, rl);
- rq->cmd_flags = rw_flags | REQ_ALLOCED;
+ req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED);
/* init elvpriv */
- if (rw_flags & REQ_ELVPRIV) {
+ if (op_flags & REQ_ELVPRIV) {
if (unlikely(et->icq_cache && !icq)) {
if (ioc)
icq = ioc_create_icq(ioc, q, gfp_mask);
@@ -1185,7 +1185,7 @@ out:
if (ioc_batching(q, ioc))
ioc->nr_batch_requests--;
- trace_block_getrq(q, bio, rw_flags & 1);
+ trace_block_getrq(q, bio, op);
return rq;
fail_elvpriv:
@@ -1215,7 +1215,7 @@ fail_alloc:
* queue, but this is pretty rare.
*/
spin_lock_irq(q->queue_lock);
- freed_request(rl, rw_flags);
+ freed_request(rl, op, op_flags);
/*
* in the very unlikely event that allocation failed and no
@@ -1233,7 +1233,8 @@ rq_starved:
/**
* get_request - get a free request
* @q: request_queue to allocate request from
- * @rw_flags: RW and SYNC flags
+ * @op: REQ_OP_READ/REQ_OP_WRITE
+ * @op_flags: rq_flag_bits
* @bio: bio to allocate request for (can be %NULL)
* @gfp_mask: allocation mask
*
@@ -1244,17 +1245,18 @@ rq_starved:
* Returns ERR_PTR on failure, with @q->queue_lock held.
* Returns request pointer on success, with @q->queue_lock *not held*.
*/
-static struct request *get_request(struct request_queue *q, int rw_flags,
- struct bio *bio, gfp_t gfp_mask)
+static struct request *get_request(struct request_queue *q, int op,
+ int op_flags, struct bio *bio,
+ gfp_t gfp_mask)
{
- const bool is_sync = rw_is_sync(rw_flags) != 0;
+ const bool is_sync = rw_is_sync(op, op_flags) != 0;
DEFINE_WAIT(wait);
struct request_list *rl;
struct request *rq;
rl = blk_get_rl(q, bio); /* transferred to @rq on success */
retry:
- rq = __get_request(rl, rw_flags, bio, gfp_mask);
+ rq = __get_request(rl, op, op_flags, bio, gfp_mask);
if (!IS_ERR(rq))
return rq;
@@ -1267,7 +1269,7 @@ retry:
prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
TASK_UNINTERRUPTIBLE);
- trace_block_sleeprq(q, bio, rw_flags & 1);
+ trace_block_sleeprq(q, bio, op);
spin_unlock_irq(q->queue_lock);
io_schedule();
@@ -1296,11 +1298,16 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
create_io_context(gfp_mask, q->node);
spin_lock_irq(q->queue_lock);
- rq = get_request(q, rw, NULL, gfp_mask);
- if (IS_ERR(rq))
+ rq = get_request(q, rw, 0, NULL, gfp_mask);
+ if (IS_ERR(rq)) {
spin_unlock_irq(q->queue_lock);
- /* q->queue_lock is unlocked at this point */
+ return rq;
+ }
+ /* q->queue_lock is unlocked at this point */
+ rq->__data_len = 0;
+ rq->__sector = (sector_t) -1;
+ rq->bio = rq->biotail = NULL;
return rq;
}
@@ -1316,63 +1323,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
EXPORT_SYMBOL(blk_get_request);
/**
- * blk_make_request - given a bio, allocate a corresponding struct request.
- * @q: target request queue
- * @bio: The bio describing the memory mappings that will be submitted for IO.
- * It may be a chained-bio properly constructed by block/bio layer.
- * @gfp_mask: gfp flags to be used for memory allocation
- *
- * blk_make_request is the parallel of generic_make_request for BLOCK_PC
- * type commands. Where the struct request needs to be farther initialized by
- * the caller. It is passed a &struct bio, which describes the memory info of
- * the I/O transfer.
- *
- * The caller of blk_make_request must make sure that bi_io_vec
- * are set to describe the memory buffers. That bio_data_dir() will return
- * the needed direction of the request. (And all bio's in the passed bio-chain
- * are properly set accordingly)
- *
- * If called under none-sleepable conditions, mapped bio buffers must not
- * need bouncing, by calling the appropriate masked or flagged allocator,
- * suitable for the target device. Otherwise the call to blk_queue_bounce will
- * BUG.
- *
- * WARNING: When allocating/cloning a bio-chain, careful consideration should be
- * given to how you allocate bios. In particular, you cannot use
- * __GFP_DIRECT_RECLAIM for anything but the first bio in the chain. Otherwise
- * you risk waiting for IO completion of a bio that hasn't been submitted yet,
- * thus resulting in a deadlock. Alternatively bios should be allocated using
- * bio_kmalloc() instead of bio_alloc(), as that avoids the mempool deadlock.
- * If possible a big IO should be split into smaller parts when allocation
- * fails. Partial allocation should not be an error, or you risk a live-lock.
- */
-struct request *blk_make_request(struct request_queue *q, struct bio *bio,
- gfp_t gfp_mask)
-{
- struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask);
-
- if (IS_ERR(rq))
- return rq;
-
- blk_rq_set_block_pc(rq);
-
- for_each_bio(bio) {
- struct bio *bounce_bio = bio;
- int ret;
-
- blk_queue_bounce(q, &bounce_bio);
- ret = blk_rq_append_bio(q, rq, bounce_bio);
- if (unlikely(ret)) {
- blk_put_request(rq);
- return ERR_PTR(ret);
- }
- }
-
- return rq;
-}
-EXPORT_SYMBOL(blk_make_request);
-
-/**
* blk_rq_set_block_pc - initialize a request to type BLOCK_PC
* @rq: request to be initialized
*
@@ -1380,9 +1330,6 @@ EXPORT_SYMBOL(blk_make_request);
void blk_rq_set_block_pc(struct request *rq)
{
rq->cmd_type = REQ_TYPE_BLOCK_PC;
- rq->__data_len = 0;
- rq->__sector = (sector_t) -1;
- rq->bio = rq->biotail = NULL;
memset(rq->__cmd, 0, sizeof(rq->__cmd));
}
EXPORT_SYMBOL(blk_rq_set_block_pc);
@@ -1501,13 +1448,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
*/
if (req->cmd_flags & REQ_ALLOCED) {
unsigned int flags = req->cmd_flags;
+ int op = req_op(req);
struct request_list *rl = blk_rq_rl(req);
BUG_ON(!list_empty(&req->queuelist));
BUG_ON(ELV_ON_HASH(req));
blk_free_request(rl, req);
- freed_request(rl, flags);
+ freed_request(rl, op, flags);
blk_put_rl(rl);
}
}
@@ -1564,7 +1512,7 @@ EXPORT_SYMBOL_GPL(blk_add_request_payload);
bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
- const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+ const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
if (!ll_back_merge_fn(q, req, bio))
return false;
@@ -1586,7 +1534,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
struct bio *bio)
{
- const int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+ const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
if (!ll_front_merge_fn(q, req, bio))
return false;
@@ -1708,8 +1656,8 @@ void init_request_from_bio(struct request *req, struct bio *bio)
{
req->cmd_type = REQ_TYPE_FS;
- req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
- if (bio->bi_rw & REQ_RAHEAD)
+ req->cmd_flags |= bio->bi_opf & REQ_COMMON_MASK;
+ if (bio->bi_opf & REQ_RAHEAD)
req->cmd_flags |= REQ_FAILFAST_MASK;
req->errors = 0;
@@ -1720,12 +1668,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
{
- const bool sync = !!(bio->bi_rw & REQ_SYNC);
+ const bool sync = !!(bio->bi_opf & REQ_SYNC);
struct blk_plug *plug;
- int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
+ int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
struct request *req;
unsigned int request_count = 0;
- bool wb_acct;
+ unsigned int wb_acct;
/*
* low level driver can indicate that it wants pages above a
@@ -1742,7 +1690,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
return BLK_QC_T_NONE;
}
- if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+ if (bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) {
spin_lock_irq(q->queue_lock);
where = ELEVATOR_INSERT_FLUSH;
goto get_rq;
@@ -1778,32 +1726,34 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
}
get_rq:
- wb_acct = wbt_wait(q->rq_wb, bio->bi_rw, q->queue_lock);
+ wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, q->queue_lock);
/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
* rq allocator and io schedulers.
*/
- rw_flags = bio_data_dir(bio);
if (sync)
rw_flags |= REQ_SYNC;
/*
+ * Add in META/PRIO flags, if set, before we get to the IO scheduler
+ */
+ rw_flags |= (bio->bi_opf & (REQ_META | REQ_PRIO));
+
+ /*
* Grab a free request. This is might sleep but can not fail.
* Returns with the queue unlocked.
*/
- req = get_request(q, rw_flags, bio, GFP_NOIO);
+ req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
if (IS_ERR(req)) {
- if (wb_acct)
- __wbt_done(q->rq_wb);
+ __wbt_done(q->rq_wb, wb_acct);
bio->bi_error = PTR_ERR(req);
bio_endio(bio);
goto out_unlock;
}
- if (wb_acct)
- wbt_mark_tracked(&req->wb_stat);
+ wbt_track(&req->wb_stat, wb_acct);
/*
* After dropping the lock and possibly sleeping here, our request
@@ -1867,9 +1817,9 @@ static void handle_bad_sector(struct bio *bio)
char b[BDEVNAME_SIZE];
printk(KERN_INFO "attempt to access beyond end of device\n");
- printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
+ printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
bdevname(bio->bi_bdev, b),
- bio->bi_rw,
+ bio->bi_opf,
(unsigned long long)bio_end_sector(bio),
(long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
}
@@ -1982,25 +1932,30 @@ generic_make_request_checks(struct bio *bio)
* drivers without flush support don't have to worry
* about them.
*/
- if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+ if ((bio->bi_opf & (REQ_PREFLUSH | REQ_FUA)) &&
!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
- bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+ bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
if (!nr_sectors) {
err = 0;
goto end_io;
}
}
- if ((bio->bi_rw & REQ_DISCARD) &&
- (!blk_queue_discard(q) ||
- ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
- err = -EOPNOTSUPP;
- goto end_io;
- }
-
- if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
- err = -EOPNOTSUPP;
- goto end_io;
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ if (!blk_queue_discard(q))
+ goto not_supported;
+ break;
+ case REQ_OP_SECURE_ERASE:
+ if (!blk_queue_secure_erase(q))
+ goto not_supported;
+ break;
+ case REQ_OP_WRITE_SAME:
+ if (!bdev_write_same(bio->bi_bdev))
+ goto not_supported;
+ break;
+ default:
+ break;
}
/*
@@ -2017,6 +1972,8 @@ generic_make_request_checks(struct bio *bio)
trace_block_bio_queue(q, bio);
return true;
+not_supported:
+ err = -EOPNOTSUPP;
end_io:
bio->bi_error = err;
bio_endio(bio);
@@ -2112,7 +2069,6 @@ EXPORT_SYMBOL(generic_make_request);
/**
* submit_bio - submit a bio to the block device layer for I/O
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
* @bio: The &struct bio which describes the I/O
*
* submit_bio() is very similar in purpose to generic_make_request(), and
@@ -2120,13 +2076,8 @@ EXPORT_SYMBOL(generic_make_request);
* interfaces; @bio must be presetup and ready for I/O.
*
*/
-blk_qc_t submit_bio(int rw, struct bio *bio)
+blk_qc_t submit_bio(struct bio *bio)
{
- bio->bi_rw |= rw;
-
- if (unlikely(trap_non_toi_io))
- BUG_ON(!bio_flagged(bio, BIO_TOI));
-
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
@@ -2134,12 +2085,12 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
if (bio_has_data(bio)) {
unsigned int count;
- if (unlikely(rw & REQ_WRITE_SAME))
+ if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
count = bio_sectors(bio);
- if (rw & WRITE) {
+ if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
} else {
task_io_account_read(bio->bi_iter.bi_size);
@@ -2150,7 +2101,7 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
char b[BDEVNAME_SIZE];
printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
current->comm, task_pid_nr(current),
- (rw & WRITE) ? "WRITE" : "READ",
+ op_is_write(bio_op(bio)) ? "WRITE" : "READ",
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(bio->bi_bdev, b),
count);
@@ -2181,7 +2132,7 @@ EXPORT_SYMBOL(submit_bio);
static int blk_cloned_rq_check_limits(struct request_queue *q,
struct request *rq)
{
- if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
+ if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
printk(KERN_ERR "%s: over max size limit.\n", __func__);
return -EIO;
}
@@ -2237,7 +2188,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
*/
BUG_ON(blk_queued_rq(rq));
- if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
+ if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
where = ELEVATOR_INSERT_FLUSH;
add_acct_request(q, rq, where);
@@ -2282,7 +2233,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
* one.
*/
for (bio = rq->bio; bio; bio = bio->bi_next) {
- if ((bio->bi_rw & ff) != ff)
+ if ((bio->bi_opf & ff) != ff)
break;
bytes += bio->bi_iter.bi_size;
}
@@ -2697,7 +2648,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
/* mixed attributes always follow the first bio */
if (req->cmd_flags & REQ_MIXED_MERGE) {
req->cmd_flags &= ~REQ_FAILFAST_MASK;
- req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
+ req->cmd_flags |= req->bio->bi_opf & REQ_FAILFAST_MASK;
}
/*
@@ -3005,8 +2956,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request_err);
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
- rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
+ req_set_op(rq, bio_op(bio));
if (bio_has_data(bio))
rq->nr_phys_segments = bio_phys_segments(q, bio);
@@ -3091,7 +3041,8 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
{
dst->cpu = src->cpu;
- dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
+ req_set_op_attrs(dst, req_op(src),
+ (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE);
dst->cmd_type = src->cmd_type;
dst->__sector = blk_rq_pos(src);
dst->__data_len = blk_rq_bytes(src);
@@ -3336,7 +3287,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
/*
* rq is already accounted, so use raw insert
*/
- if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
+ if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
else
__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
@@ -3403,6 +3354,7 @@ bool blk_poll(struct request_queue *q, blk_qc_t cookie)
return false;
}
+EXPORT_SYMBOL_GPL(blk_poll);
#ifdef CONFIG_PM
/**
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 3fec8a29d..7ea04325d 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -62,7 +62,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
/*
* don't check dying flag for MQ because the request won't
- * be resued after dying flag is set
+ * be reused after dying flag is set
*/
if (q->mq_ops) {
blk_mq_insert_request(rq, at_head, true, false);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index b1c91d229..d308def81 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -10,8 +10,8 @@
* optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
* properties and hardware capability.
*
- * If a request doesn't have data, only REQ_FLUSH makes sense, which
- * indicates a simple flush request. If there is data, REQ_FLUSH indicates
+ * If a request doesn't have data, only REQ_PREFLUSH makes sense, which
+ * indicates a simple flush request. If there is data, REQ_PREFLUSH indicates
* that the device cache should be flushed before the data is executed, and
* REQ_FUA means that the data must be on non-volatile media on request
* completion.
@@ -20,16 +20,16 @@
* difference. The requests are either completed immediately if there's no
* data or executed as normal requests otherwise.
*
- * If the device has writeback cache and supports FUA, REQ_FLUSH is
+ * If the device has writeback cache and supports FUA, REQ_PREFLUSH is
* translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
*
- * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is
- * translated to PREFLUSH and REQ_FUA to POSTFLUSH.
+ * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH
+ * is translated to PREFLUSH and REQ_FUA to POSTFLUSH.
*
* The actual execution of flush is double buffered. Whenever a request
* needs to execute PRE or POSTFLUSH, it queues at
* fq->flush_queue[fq->flush_pending_idx]. Once certain criteria are met, a
- * flush is issued and the pending_idx is toggled. When the flush
+ * REQ_OP_FLUSH is issued and the pending_idx is toggled. When the flush
* completes, all the requests which were pending are proceeded to the next
* step. This allows arbitrary merging of different types of FLUSH/FUA
* requests.
@@ -103,7 +103,7 @@ static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
policy |= REQ_FSEQ_DATA;
if (fflags & (1UL << QUEUE_FLAG_WC)) {
- if (rq->cmd_flags & REQ_FLUSH)
+ if (rq->cmd_flags & REQ_PREFLUSH)
policy |= REQ_FSEQ_PREFLUSH;
if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
(rq->cmd_flags & REQ_FUA))
@@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
}
flush_rq->cmd_type = REQ_TYPE_FS;
- flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+ req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH | REQ_FLUSH_SEQ);
flush_rq->rq_disk = first_rq->rq_disk;
flush_rq->end_io = flush_end_io;
@@ -391,9 +391,9 @@ void blk_insert_flush(struct request *rq)
/*
* @policy now records what operations need to be done. Adjust
- * REQ_FLUSH and FUA for the driver.
+ * REQ_PREFLUSH and FUA for the driver.
*/
- rq->cmd_flags &= ~REQ_FLUSH;
+ rq->cmd_flags &= ~REQ_PREFLUSH;
if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
rq->cmd_flags &= ~REQ_FUA;
@@ -485,8 +485,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
bio = bio_alloc(gfp_mask, 0);
bio->bi_bdev = bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
- ret = submit_bio_wait(WRITE_FLUSH, bio);
+ ret = submit_bio_wait(bio);
/*
* The driver must store the error location in ->bi_sector, if
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9e29dc351..083e56f72 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,33 +9,46 @@
#include "blk.h"
-static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages,
+static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
gfp_t gfp)
{
struct bio *new = bio_alloc(gfp, nr_pages);
if (bio) {
bio_chain(bio, new);
- submit_bio(rw, bio);
+ submit_bio(bio);
}
return new;
}
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop)
+ sector_t nr_sects, gfp_t gfp_mask, int flags,
+ struct bio **biop)
{
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = *biop;
unsigned int granularity;
+ enum req_op op;
int alignment;
if (!q)
return -ENXIO;
- if (!blk_queue_discard(q))
- return -EOPNOTSUPP;
- if ((type & REQ_SECURE) && !blk_queue_secdiscard(q))
- return -EOPNOTSUPP;
+
+ if (flags & BLKDEV_DISCARD_SECURE) {
+ if (flags & BLKDEV_DISCARD_ZERO)
+ return -EOPNOTSUPP;
+ if (!blk_queue_secure_erase(q))
+ return -EOPNOTSUPP;
+ op = REQ_OP_SECURE_ERASE;
+ } else {
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+ if ((flags & BLKDEV_DISCARD_ZERO) &&
+ !q->limits.discard_zeroes_data)
+ return -EOPNOTSUPP;
+ op = REQ_OP_DISCARD;
+ }
/* Zero-sector (unknown) and one-sector granularities are the same. */
granularity = max(q->limits.discard_granularity >> 9, 1U);
@@ -62,9 +75,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
req_sects = end_sect - sector;
}
- bio = next_bio(bio, type, 1, gfp_mask);
+ bio = next_bio(bio, 1, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
+ bio_set_op_attrs(bio, op, 0);
bio->bi_iter.bi_size = req_sects << 9;
nr_sects -= req_sects;
@@ -98,20 +112,16 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
{
- int type = REQ_WRITE | REQ_DISCARD;
struct bio *bio = NULL;
struct blk_plug plug;
int ret;
- if (flags & BLKDEV_DISCARD_SECURE)
- type |= REQ_SECURE;
-
blk_start_plug(&plug);
- ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, type,
+ ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
&bio);
if (!ret && bio) {
- ret = submit_bio_wait(type, bio);
- if (ret == -EOPNOTSUPP)
+ ret = submit_bio_wait(bio);
+ if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO))
ret = 0;
bio_put(bio);
}
@@ -148,13 +158,14 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
max_write_same_sectors = UINT_MAX >> 9;
while (nr_sects) {
- bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask);
+ bio = next_bio(bio, 1, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
+ bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
if (nr_sects > max_write_same_sectors) {
bio->bi_iter.bi_size = max_write_same_sectors << 9;
@@ -167,10 +178,10 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
}
if (bio) {
- ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio);
+ ret = submit_bio_wait(bio);
bio_put(bio);
}
- return ret != -EOPNOTSUPP ? ret : 0;
+ return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);
@@ -193,11 +204,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
unsigned int sz;
while (nr_sects != 0) {
- bio = next_bio(bio, WRITE,
- min(nr_sects, (sector_t)BIO_MAX_PAGES),
+ bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
gfp_mask);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
while (nr_sects != 0) {
sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -210,7 +221,7 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
}
if (bio) {
- ret = submit_bio_wait(WRITE, bio);
+ ret = submit_bio_wait(bio);
bio_put(bio);
return ret;
}
@@ -241,11 +252,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, bool discard)
{
- struct request_queue *q = bdev_get_queue(bdev);
-
- if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data &&
- blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0)
- return 0;
+ if (discard) {
+ if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
+ BLKDEV_DISCARD_ZERO))
+ return 0;
+ }
if (bdev_write_same(bdev) &&
blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
diff --git a/block/blk-map.c b/block/blk-map.c
index b9f88b775..b8657fa8d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -9,21 +9,26 @@
#include "blk.h"
-int blk_rq_append_bio(struct request_queue *q, struct request *rq,
- struct bio *bio)
+/*
+ * Append a bio to a passthrough request. Only works can be merged into
+ * the request based on the driver constraints.
+ */
+int blk_rq_append_bio(struct request *rq, struct bio *bio)
{
- if (!rq->bio)
- blk_rq_bio_prep(q, rq, bio);
- else if (!ll_back_merge_fn(q, rq, bio))
- return -EINVAL;
- else {
+ if (!rq->bio) {
+ blk_rq_bio_prep(rq->q, rq, bio);
+ } else {
+ if (!ll_back_merge_fn(rq->q, rq, bio))
+ return -EINVAL;
+
rq->biotail->bi_next = bio;
rq->biotail = bio;
-
rq->__data_len += bio->bi_iter.bi_size;
}
+
return 0;
}
+EXPORT_SYMBOL(blk_rq_append_bio);
static int __blk_rq_unmap_user(struct bio *bio)
{
@@ -71,7 +76,7 @@ static int __blk_rq_map_user_iov(struct request *rq,
*/
bio_get(bio);
- ret = blk_rq_append_bio(q, rq, bio);
+ ret = blk_rq_append_bio(rq, bio);
if (ret) {
bio_endio(bio);
__blk_rq_unmap_user(orig_bio);
@@ -224,12 +229,12 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
return PTR_ERR(bio);
if (!reading)
- bio->bi_rw |= REQ_WRITE;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
if (do_copy)
rq->cmd_flags |= REQ_COPY_USER;
- ret = blk_rq_append_bio(q, rq, bio);
+ ret = blk_rq_append_bio(rq, bio);
if (unlikely(ret)) {
/* request is too big */
bio_put(bio);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index bea93441a..2642e5fc8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -194,12 +194,18 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
struct bio *split, *res;
unsigned nsegs;
- if ((*bio)->bi_rw & REQ_DISCARD)
+ switch (bio_op(*bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
- else if ((*bio)->bi_rw & REQ_WRITE_SAME)
+ break;
+ case REQ_OP_WRITE_SAME:
split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
- else
+ break;
+ default:
split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
+ break;
+ }
/* physical segments can be figured out during splitting */
res = split ? split : *bio;
@@ -208,7 +214,7 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
if (split) {
/* there isn't chance to merge the splitted bio */
- split->bi_rw |= REQ_NOMERGE;
+ split->bi_opf |= REQ_NOMERGE;
bio_chain(split, *bio);
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
@@ -235,10 +241,10 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
* This should probably be returning 0, but blk_add_request_payload()
* (Christoph!!!!)
*/
- if (bio->bi_rw & REQ_DISCARD)
+ if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
return 1;
- if (bio->bi_rw & REQ_WRITE_SAME)
+ if (bio_op(bio) == REQ_OP_WRITE_SAME)
return 1;
fbio = bio;
@@ -407,7 +413,9 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
nsegs = 0;
cluster = blk_queue_cluster(q);
- if (bio->bi_rw & REQ_DISCARD) {
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
/*
* This is a hack - drivers should be neither modifying the
* biovec, nor relying on bi_vcnt - but because of
@@ -415,19 +423,16 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
* a payload we need to set up here (thank you Christoph) and
* bi_vcnt is really the only way of telling if we need to.
*/
-
- if (bio->bi_vcnt)
- goto single_segment;
-
- return 0;
- }
-
- if (bio->bi_rw & REQ_WRITE_SAME) {
-single_segment:
+ if (!bio->bi_vcnt)
+ return 0;
+ /* Fall through */
+ case REQ_OP_WRITE_SAME:
*sg = sglist;
bvec = bio_iovec(bio);
sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
return 1;
+ default:
+ break;
}
for_each_bio(bio)
@@ -461,7 +466,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
}
if (q->dma_drain_size && q->dma_drain_needed(rq)) {
- if (rq->cmd_flags & REQ_WRITE)
+ if (op_is_write(req_op(rq)))
memset(q->dma_drain_buffer, 0, q->dma_drain_size);
sg_unmark_end(sg);
@@ -522,7 +527,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
integrity_req_gap_back_merge(req, bio))
return 0;
if (blk_rq_sectors(req) + bio_sectors(bio) >
- blk_rq_get_max_sectors(req)) {
+ blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -546,7 +551,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
integrity_req_gap_front_merge(req, bio))
return 0;
if (blk_rq_sectors(req) + bio_sectors(bio) >
- blk_rq_get_max_sectors(req)) {
+ blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
req->cmd_flags |= REQ_NOMERGE;
if (req == q->last_merge)
q->last_merge = NULL;
@@ -592,7 +597,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
* Will it become too large?
*/
if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
- blk_rq_get_max_sectors(req))
+ blk_rq_get_max_sectors(req, blk_rq_pos(req)))
return 0;
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -638,9 +643,9 @@ void blk_rq_set_mixed_merge(struct request *rq)
* Distributes the attributs to each bio.
*/
for (bio = rq->bio; bio; bio = bio->bi_next) {
- WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) &&
- (bio->bi_rw & REQ_FAILFAST_MASK) != ff);
- bio->bi_rw |= ff;
+ WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
+ (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
+ bio->bi_opf |= ff;
}
rq->cmd_flags |= REQ_MIXED_MERGE;
}
@@ -671,7 +676,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (!rq_mergeable(req) || !rq_mergeable(next))
return 0;
- if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
+ if (req_op(req) != req_op(next))
return 0;
/*
@@ -685,7 +690,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| req_no_special_merge(next))
return 0;
- if (req->cmd_flags & REQ_WRITE_SAME &&
+ if (req_op(req) == REQ_OP_WRITE_SAME &&
!blk_write_same_mergeable(req->bio, next->bio))
return 0;
@@ -765,6 +770,12 @@ int attempt_front_merge(struct request_queue *q, struct request *rq)
int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next)
{
+ struct elevator_queue *e = q->elevator;
+
+ if (e->type->ops.elevator_allow_rq_merge_fn)
+ if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
+ return 0;
+
return attempt_merge(q, rq, next);
}
@@ -773,7 +784,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (!rq_mergeable(rq) || !bio_mergeable(bio))
return false;
- if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
+ if (req_op(rq) != bio_op(bio))
return false;
/* different data direction or already started, don't merge */
@@ -789,7 +800,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
return false;
/* must be using the same buffer */
- if (rq->cmd_flags & REQ_WRITE_SAME &&
+ if (req_op(rq) == REQ_OP_WRITE_SAME &&
!blk_write_same_mergeable(rq->bio, bio))
return false;
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 2f68015f8..b66bbf13c 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -427,15 +427,13 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
return ret;
}
-void blk_mq_unregister_disk(struct gendisk *disk)
+static void __blk_mq_unregister_disk(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
int i, j;
- blk_mq_disable_hotplug();
-
queue_for_each_hw_ctx(q, hctx, i) {
blk_mq_unregister_hctx(hctx);
@@ -452,6 +450,12 @@ void blk_mq_unregister_disk(struct gendisk *disk)
kobject_put(&disk_to_dev(disk)->kobj);
q->mq_sysfs_init_done = false;
+}
+
+void blk_mq_unregister_disk(struct gendisk *disk)
+{
+ blk_mq_disable_hotplug();
+ __blk_mq_unregister_disk(disk);
blk_mq_enable_hotplug();
}
@@ -497,7 +501,7 @@ int blk_mq_register_disk(struct gendisk *disk)
}
if (ret)
- blk_mq_unregister_disk(disk);
+ __blk_mq_unregister_disk(disk);
else
q->mq_sysfs_init_done = true;
out:
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 56a0c37a3..729bac3a6 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -485,6 +485,32 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
+int blk_mq_reinit_tagset(struct blk_mq_tag_set *set)
+{
+ int i, j, ret = 0;
+
+ if (!set->ops->reinit_request)
+ goto out;
+
+ for (i = 0; i < set->nr_hw_queues; i++) {
+ struct blk_mq_tags *tags = set->tags[i];
+
+ for (j = 0; j < tags->nr_tags; j++) {
+ if (!tags->rqs[j])
+ continue;
+
+ ret = set->ops->reinit_request(set->driver_data,
+ tags->rqs[j]);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
+
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv)
{
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 23ff76a40..815e2ac02 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -161,16 +161,17 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
EXPORT_SYMBOL(blk_mq_can_queue);
static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
- struct request *rq, unsigned int rw_flags)
+ struct request *rq, int op,
+ unsigned int op_flags)
{
if (blk_queue_io_stat(q))
- rw_flags |= REQ_IO_STAT;
+ op_flags |= REQ_IO_STAT;
INIT_LIST_HEAD(&rq->queuelist);
/* csd/requeue_work/fifo_time is initialized before use */
rq->q = q;
rq->mq_ctx = ctx;
- rq->cmd_flags |= rw_flags;
+ req_set_op_attrs(rq, op, op_flags);
/* do not touch atomic flags, it needs atomic ops against the timer */
rq->cpu = -1;
INIT_HLIST_NODE(&rq->hash);
@@ -205,11 +206,11 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
rq->end_io_data = NULL;
rq->next_rq = NULL;
- ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
+ ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
}
static struct request *
-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
+__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
{
struct request *rq;
unsigned int tag;
@@ -224,7 +225,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
}
rq->tag = tag;
- blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw);
+ blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
return rq;
}
@@ -248,7 +249,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
- rq = __blk_mq_alloc_request(&alloc_data, rw);
+ rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
__blk_mq_run_hw_queue(hctx);
blk_mq_put_ctx(ctx);
@@ -256,7 +257,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
- rq = __blk_mq_alloc_request(&alloc_data, rw);
+ rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
ctx = alloc_data.ctx;
}
blk_mq_put_ctx(ctx);
@@ -264,10 +265,65 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
blk_queue_exit(q);
return ERR_PTR(-EWOULDBLOCK);
}
+
+ rq->__data_len = 0;
+ rq->__sector = (sector_t) -1;
+ rq->bio = rq->biotail = NULL;
return rq;
}
EXPORT_SYMBOL(blk_mq_alloc_request);
+struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
+ unsigned int flags, unsigned int hctx_idx)
+{
+ struct blk_mq_hw_ctx *hctx;
+ struct blk_mq_ctx *ctx;
+ struct request *rq;
+ struct blk_mq_alloc_data alloc_data;
+ int ret;
+
+ /*
+ * If the tag allocator sleeps we could get an allocation for a
+ * different hardware context. No need to complicate the low level
+ * allocator for this for the rare use case of a command tied to
+ * a specific queue.
+ */
+ if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)))
+ return ERR_PTR(-EINVAL);
+
+ if (hctx_idx >= q->nr_hw_queues)
+ return ERR_PTR(-EIO);
+
+ ret = blk_queue_enter(q, true);
+ if (ret)
+ return ERR_PTR(ret);
+
+ /*
+ * Check if the hardware context is actually mapped to anything.
+ * If not tell the caller that it should skip this queue.
+ */
+ hctx = q->queue_hw_ctx[hctx_idx];
+ if (!blk_mq_hw_queue_mapped(hctx)) {
+ ret = -EXDEV;
+ goto out_queue_exit;
+ }
+ ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+
+ blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
+ rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
+ if (!rq) {
+ ret = -EWOULDBLOCK;
+ goto out_queue_exit;
+ }
+
+ return rq;
+
+out_queue_exit:
+ blk_queue_exit(q);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+
static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx, struct request *rq)
{
@@ -645,7 +701,20 @@ static void blk_mq_timeout_work(struct work_struct *work)
};
int i;
- if (blk_queue_enter(q, true))
+ /* A deadlock might occur if a request is stuck requiring a
+ * timeout at the same time a queue freeze is waiting
+ * completion, since the timeout code would not be able to
+ * acquire the queue reference here.
+ *
+ * That's why we don't use blk_queue_enter here; instead, we use
+ * percpu_ref_tryget directly, because we need to be able to
+ * obtain a reference even in the short window between the queue
+ * starting to freeze, by dropping the first reference in
+ * blk_mq_freeze_queue_start, and the moment the last request is
+ * consumed, marked by the instant q_usage_counter reaches
+ * zero.
+ */
+ if (!percpu_ref_tryget(&q->q_usage_counter))
return;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
@@ -753,11 +822,12 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
struct list_head *dptr;
int queued;
- WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
-
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
return;
+ WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
+ cpu_online(hctx->next_cpu));
+
hctx->run++;
/*
@@ -801,7 +871,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
switch (ret) {
case BLK_MQ_RQ_QUEUE_OK:
queued++;
- continue;
+ break;
case BLK_MQ_RQ_QUEUE_BUSY:
list_add(&rq->queuelist, &rq_list);
__blk_mq_requeue_request(rq);
@@ -996,10 +1066,11 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
EXPORT_SYMBOL(blk_mq_delay_queue);
static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx,
struct request *rq,
bool at_head)
{
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
+
trace_block_rq_insert(hctx->queue, rq);
if (at_head)
@@ -1013,20 +1084,16 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
- __blk_mq_insert_req_list(hctx, ctx, rq, at_head);
+ __blk_mq_insert_req_list(hctx, rq, at_head);
blk_mq_hctx_mark_pending(hctx, ctx);
}
void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
- bool async)
+ bool async)
{
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
-
- current_ctx = blk_mq_get_ctx(q);
- if (!cpu_online(ctx->cpu))
- rq->mq_ctx = ctx = current_ctx;
hctx = q->mq_ops->map_queue(q, ctx->cpu);
@@ -1036,8 +1103,6 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
-
- blk_mq_put_ctx(current_ctx);
}
static void blk_mq_insert_requests(struct request_queue *q,
@@ -1048,14 +1113,9 @@ static void blk_mq_insert_requests(struct request_queue *q,
{
struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *current_ctx;
trace_block_unplug(q, depth, !from_schedule);
- current_ctx = blk_mq_get_ctx(q);
-
- if (!cpu_online(ctx->cpu))
- ctx = current_ctx;
hctx = q->mq_ops->map_queue(q, ctx->cpu);
/*
@@ -1067,15 +1127,14 @@ static void blk_mq_insert_requests(struct request_queue *q,
struct request *rq;
rq = list_first_entry(list, struct request, queuelist);
+ BUG_ON(rq->mq_ctx != ctx);
list_del_init(&rq->queuelist);
- rq->mq_ctx = ctx;
- __blk_mq_insert_req_list(hctx, ctx, rq, false);
+ __blk_mq_insert_req_list(hctx, rq, false);
}
blk_mq_hctx_mark_pending(hctx, ctx);
spin_unlock(&ctx->lock);
blk_mq_run_hw_queue(hctx, from_schedule);
- blk_mq_put_ctx(current_ctx);
}
static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -1186,28 +1245,29 @@ static struct request *blk_mq_map_request(struct request_queue *q,
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
struct request *rq;
- int rw = bio_data_dir(bio);
+ int op = bio_data_dir(bio);
+ int op_flags = 0;
struct blk_mq_alloc_data alloc_data;
blk_queue_enter_live(q);
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
- if (rw_is_sync(bio->bi_rw))
- rw |= REQ_SYNC;
+ if (rw_is_sync(bio_op(bio), bio->bi_opf))
+ op_flags |= REQ_SYNC;
- trace_block_getrq(q, bio, rw);
+ trace_block_getrq(q, bio, op);
blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
- rq = __blk_mq_alloc_request(&alloc_data, rw);
+ rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
if (unlikely(!rq)) {
__blk_mq_run_hw_queue(hctx);
blk_mq_put_ctx(ctx);
- trace_block_sleeprq(q, bio, rw);
+ trace_block_sleeprq(q, bio, op);
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
- rq = __blk_mq_alloc_request(&alloc_data, rw);
+ rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
ctx = alloc_data.ctx;
hctx = alloc_data.hctx;
}
@@ -1261,15 +1321,15 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
*/
static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
{
- const int is_sync = rw_is_sync(bio->bi_rw);
- const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+ const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
+ const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
struct blk_map_ctx data;
struct request *rq;
unsigned int request_count = 0;
struct blk_plug *plug;
struct request *same_queue_rq = NULL;
blk_qc_t cookie;
- bool wb_acct;
+ unsigned int wb_acct;
blk_queue_bounce(q, &bio);
@@ -1284,17 +1344,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
return BLK_QC_T_NONE;
- wb_acct = wbt_wait(q->rq_wb, bio->bi_rw, NULL);
+ wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, NULL);
rq = blk_mq_map_request(q, bio, &data);
if (unlikely(!rq)) {
- if (wb_acct)
- __wbt_done(q->rq_wb);
+ __wbt_done(q->rq_wb, wb_acct);
return BLK_QC_T_NONE;
}
- if (wb_acct)
- wbt_mark_tracked(&rq->wb_stat);
+ wbt_track(&rq->wb_stat, wb_acct);
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
@@ -1364,14 +1422,14 @@ done:
*/
static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
{
- const int is_sync = rw_is_sync(bio->bi_rw);
- const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+ const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf);
+ const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
struct blk_plug *plug;
unsigned int request_count = 0;
struct blk_map_ctx data;
struct request *rq;
blk_qc_t cookie;
- bool wb_acct;
+ unsigned int wb_acct;
blk_queue_bounce(q, &bio);
@@ -1388,17 +1446,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
} else
request_count = blk_plug_queued_count(q);
- wb_acct = wbt_wait(q->rq_wb, bio->bi_rw, NULL);
+ wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, NULL);
rq = blk_mq_map_request(q, bio, &data);
if (unlikely(!rq)) {
- if (wb_acct)
- __wbt_done(q->rq_wb);
+ __wbt_done(q->rq_wb, wb_acct);
return BLK_QC_T_NONE;
}
- if (wb_acct)
- wbt_mark_tracked(&rq->wb_stat);
+ wbt_track(&rq->wb_stat, wb_acct);
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
@@ -1607,16 +1663,17 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
return 0;
}
+/*
+ * 'cpu' is going away. splice any existing rq_list entries from this
+ * software queue to the hw queue dispatch list, and ensure that it
+ * gets run.
+ */
static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
{
- struct request_queue *q = hctx->queue;
struct blk_mq_ctx *ctx;
LIST_HEAD(tmp);
- /*
- * Move ctx entries to new CPU, if this one is going away.
- */
- ctx = __blk_mq_get_ctx(q, cpu);
+ ctx = __blk_mq_get_ctx(hctx->queue, cpu);
spin_lock(&ctx->lock);
if (!list_empty(&ctx->rq_list)) {
@@ -1628,24 +1685,11 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
if (list_empty(&tmp))
return NOTIFY_OK;
- ctx = blk_mq_get_ctx(q);
- spin_lock(&ctx->lock);
-
- while (!list_empty(&tmp)) {
- struct request *rq;
-
- rq = list_first_entry(&tmp, struct request, queuelist);
- rq->mq_ctx = ctx;
- list_move_tail(&rq->queuelist, &ctx->rq_list);
- }
-
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
- blk_mq_hctx_mark_pending(hctx, ctx);
-
- spin_unlock(&ctx->lock);
+ spin_lock(&hctx->lock);
+ list_splice_tail_init(&tmp, &hctx->dispatch);
+ spin_unlock(&hctx->lock);
blk_mq_run_hw_queue(hctx, true);
- blk_mq_put_ctx(ctx);
return NOTIFY_OK;
}
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 8e3974d87..bdb16d84b 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -9,11 +9,29 @@
#include "blk-stat.h"
#include "blk-mq.h"
+static void blk_stat_flush_batch(struct blk_rq_stat *stat)
+{
+ if (!stat->nr_batch)
+ return;
+ if (!stat->nr_samples)
+ stat->mean = div64_s64(stat->batch, stat->nr_batch);
+ else {
+ stat->mean = div64_s64((stat->mean * stat->nr_samples) +
+ stat->batch,
+ stat->nr_samples + stat->nr_batch);
+ }
+
+ stat->nr_samples += stat->nr_batch;
+ stat->nr_batch = stat->batch = 0;
+}
+
void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
{
if (!src->nr_samples)
return;
+ blk_stat_flush_batch(src);
+
dst->min = min(dst->min, src->min);
dst->max = max(dst->max, src->max);
@@ -31,6 +49,7 @@ static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
{
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
+ uint64_t latest = 0;
int i, j, nr;
blk_stat_init(&dst[0]);
@@ -58,6 +77,9 @@ static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
if (!newest)
break;
+ if (newest > latest)
+ latest = newest;
+
queue_for_each_hw_ctx(q, hctx, i) {
hctx_for_each_ctx(hctx, ctx, j) {
if (ctx->stat[0].time == newest) {
@@ -75,6 +97,8 @@ static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
* Should be very rare.
*/
} while (!nr);
+
+ dst[0].time = dst[1].time = latest;
}
void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst)
@@ -132,6 +156,7 @@ static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now)
{
stat->min = -1ULL;
stat->max = stat->nr_samples = stat->mean = 0;
+ stat->batch = stat->nr_batch = 0;
stat->time = time_now & BLK_STAT_MASK;
}
@@ -140,16 +165,26 @@ void blk_stat_init(struct blk_rq_stat *stat)
__blk_stat_init(stat, ktime_to_ns(ktime_get()));
}
+static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now)
+{
+ return (now & BLK_STAT_MASK) == (stat->time & BLK_STAT_MASK);
+}
+
+bool blk_stat_is_current(struct blk_rq_stat *stat)
+{
+ return __blk_stat_is_current(stat, ktime_to_ns(ktime_get()));
+}
+
void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
{
- s64 delta, now, value;
+ s64 now, value;
u64 rq_time = wbt_issue_stat_get_time(&rq->wb_stat);
now = ktime_to_ns(ktime_get());
if (now < rq_time)
return;
- if ((now & BLK_STAT_MASK) != (stat->time & BLK_STAT_MASK))
+ if (!__blk_stat_is_current(stat, now))
__blk_stat_init(stat, now);
value = now - rq_time;
@@ -158,11 +193,12 @@ void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
if (value < stat->min)
stat->min = value;
- delta = value - stat->mean;
- if (delta)
- stat->mean += div64_s64(delta, stat->nr_samples + 1);
+ if (stat->batch + value < stat->batch ||
+ stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
+ blk_stat_flush_batch(stat);
- stat->nr_samples++;
+ stat->batch += value;
+ stat->nr_batch++;
}
void blk_stat_clear(struct request_queue *q)
diff --git a/block/blk-stat.h b/block/blk-stat.h
index d77548dbf..376a6ccd9 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -13,5 +13,6 @@ void blk_queue_stat_get(struct request_queue *, struct blk_rq_stat *);
void blk_stat_clear(struct request_queue *q);
void blk_stat_init(struct blk_rq_stat *);
void blk_stat_sum(struct blk_rq_stat *, struct blk_rq_stat *);
+bool blk_stat_is_current(struct blk_rq_stat *);
#endif
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index df194bf93..85c3dc223 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -445,6 +445,11 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
return count;
}
+static ssize_t queue_dax_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(blk_queue_dax(q), page);
+}
+
static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
{
return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
@@ -602,6 +607,11 @@ static struct queue_sysfs_entry queue_wc_entry = {
.store = queue_wc_store,
};
+static struct queue_sysfs_entry queue_dax_entry = {
+ .attr = {.name = "dax", .mode = S_IRUGO },
+ .show = queue_dax_show,
+};
+
static struct queue_sysfs_entry queue_stats_entry = {
.attr = {.name = "stats", .mode = S_IRUGO },
.show = queue_stats_show,
@@ -645,6 +655,7 @@ static struct attribute *default_attrs[] = {
&queue_random_entry.attr,
&queue_poll_entry.attr,
&queue_wc_entry.attr,
+ &queue_dax_entry.attr,
&queue_stats_entry.attr,
&queue_wb_lat_entry.attr,
&queue_wb_win_entry.attr,
@@ -772,9 +783,15 @@ static void blk_wb_stat_clear(void *data)
blk_stat_clear(data);
}
+static bool blk_wb_stat_is_current(struct blk_rq_stat *stat)
+{
+ return blk_stat_is_current(stat);
+}
+
static struct wb_stat_ops wb_stat_ops = {
- .get = blk_wb_stat_get,
- .clear = blk_wb_stat_clear,
+ .get = blk_wb_stat_get,
+ .is_current = blk_wb_stat_is_current,
+ .clear = blk_wb_stat_clear,
};
static void blk_wb_init(struct request_queue *q)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 47a3e5406..a3ea8260c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -145,11 +145,6 @@ struct throtl_data
/* Total Number of queued bios on READ and WRITE lists */
unsigned int nr_queued[2];
- /*
- * number of total undestroyed groups
- */
- unsigned int nr_undestroyed_grps;
-
/* Work for dispatching throttled bios */
struct work_struct dispatch_work;
};
@@ -785,9 +780,11 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
/*
* If previous slice expired, start a new one otherwise renew/extend
* existing slice to make sure it is at least throtl_slice interval
- * long since now.
+ * long since now. New slice is started only for empty throttle group.
+ * If there is queued bio, that means there should be an active
+ * slice and it should be extended instead.
*/
- if (throtl_slice_used(tg, rw))
+ if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
throtl_start_new_slice(tg, rw);
else {
if (time_before(tg->slice_end[rw], jiffies + throtl_slice))
@@ -826,8 +823,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
* second time when it eventually gets issued. Set it when a bio
* is being charged to a tg.
*/
- if (!(bio->bi_rw & REQ_THROTTLED))
- bio->bi_rw |= REQ_THROTTLED;
+ if (!(bio->bi_opf & REQ_THROTTLED))
+ bio->bi_opf |= REQ_THROTTLED;
}
/**
@@ -1404,7 +1401,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
WARN_ON_ONCE(!rcu_read_lock_held());
/* see throtl_charge_bio() */
- if ((bio->bi_rw & REQ_THROTTLED) || !tg->has_rules[rw])
+ if ((bio->bi_opf & REQ_THROTTLED) || !tg->has_rules[rw])
goto out;
spin_lock_irq(q->queue_lock);
@@ -1483,7 +1480,7 @@ out:
* being issued.
*/
if (!throttled)
- bio->bi_rw &= ~REQ_THROTTLED;
+ bio->bi_opf &= ~REQ_THROTTLED;
return throttled;
}
diff --git a/block/blk.h b/block/blk.h
index 70e4aee9c..c37492f5e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -64,8 +64,6 @@ void blk_exit_rl(struct request_list *rl);
void init_request_from_bio(struct request *req, struct bio *bio);
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio);
-int blk_rq_append_bio(struct request_queue *q, struct request *rq,
- struct bio *bio);
void blk_queue_bypass_start(struct request_queue *q);
void blk_queue_bypass_end(struct request_queue *q);
void blk_dequeue_request(struct request *rq);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 49707ef43..fdcd5999d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -10,7 +10,7 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
-#include <linux/jiffies.h>
+#include <linux/ktime.h>
#include <linux/rbtree.h>
#include <linux/ioprio.h>
#include <linux/blktrace_api.h>
@@ -22,28 +22,28 @@
*/
/* max queue in one round of service */
static const int cfq_quantum = 8;
-static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
+static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
/* maximum backwards seek, in KiB */
static const int cfq_back_max = 16 * 1024;
/* penalty of a backwards seek */
static const int cfq_back_penalty = 2;
-static const int cfq_slice_sync = HZ / 10;
-static int cfq_slice_async = HZ / 25;
+static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
+static u64 cfq_slice_async = NSEC_PER_SEC / 25;
static const int cfq_slice_async_rq = 2;
-static int cfq_slice_idle = HZ / 125;
-static int cfq_group_idle = HZ / 125;
-static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
+static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
+static u64 cfq_group_idle = NSEC_PER_SEC / 125;
+static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
static const int cfq_hist_divisor = 4;
/*
* offset from end of service tree
*/
-#define CFQ_IDLE_DELAY (HZ / 5)
+#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5)
/*
* below this threshold, we consider thinktime immediate
*/
-#define CFQ_MIN_TT (2)
+#define CFQ_MIN_TT (2 * NSEC_PER_SEC / HZ)
#define CFQ_SLICE_SCALE (5)
#define CFQ_HW_QUEUE_MIN (5)
@@ -73,11 +73,11 @@ static struct kmem_cache *cfq_pool;
#define CFQ_WEIGHT_LEGACY_MAX 1000
struct cfq_ttime {
- unsigned long last_end_request;
+ u64 last_end_request;
- unsigned long ttime_total;
+ u64 ttime_total;
+ u64 ttime_mean;
unsigned long ttime_samples;
- unsigned long ttime_mean;
};
/*
@@ -94,7 +94,7 @@ struct cfq_rb_root {
struct cfq_ttime ttime;
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
- .ttime = {.last_end_request = jiffies,},}
+ .ttime = {.last_end_request = ktime_get_ns(),},}
/*
* Per process-grouping structure
@@ -109,7 +109,7 @@ struct cfq_queue {
/* service_tree member */
struct rb_node rb_node;
/* service_tree key */
- unsigned long rb_key;
+ u64 rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
@@ -126,13 +126,13 @@ struct cfq_queue {
struct list_head fifo;
/* time when queue got scheduled in to dispatch first request. */
- unsigned long dispatch_start;
- unsigned int allocated_slice;
- unsigned int slice_dispatch;
+ u64 dispatch_start;
+ u64 allocated_slice;
+ u64 slice_dispatch;
/* time when first request from queue completed and slice started. */
- unsigned long slice_start;
- unsigned long slice_end;
- long slice_resid;
+ u64 slice_start;
+ u64 slice_end;
+ s64 slice_resid;
/* pending priority requests */
int prio_pending;
@@ -141,7 +141,7 @@ struct cfq_queue {
/* io prio of this group */
unsigned short ioprio, org_ioprio;
- unsigned short ioprio_class;
+ unsigned short ioprio_class, org_ioprio_class;
pid_t pid;
@@ -290,7 +290,7 @@ struct cfq_group {
struct cfq_rb_root service_trees[2][3];
struct cfq_rb_root service_tree_idle;
- unsigned long saved_wl_slice;
+ u64 saved_wl_slice;
enum wl_type_t saved_wl_type;
enum wl_class_t saved_wl_class;
@@ -329,7 +329,7 @@ struct cfq_data {
*/
enum wl_class_t serving_wl_class;
enum wl_type_t serving_wl_type;
- unsigned long workload_expires;
+ u64 workload_expires;
struct cfq_group *serving_group;
/*
@@ -362,7 +362,7 @@ struct cfq_data {
/*
* idle window management
*/
- struct timer_list idle_slice_timer;
+ struct hrtimer idle_slice_timer;
struct work_struct unplug_work;
struct cfq_queue *active_queue;
@@ -374,22 +374,22 @@ struct cfq_data {
* tunables, see top of file
*/
unsigned int cfq_quantum;
- unsigned int cfq_fifo_expire[2];
unsigned int cfq_back_penalty;
unsigned int cfq_back_max;
- unsigned int cfq_slice[2];
unsigned int cfq_slice_async_rq;
- unsigned int cfq_slice_idle;
- unsigned int cfq_group_idle;
unsigned int cfq_latency;
- unsigned int cfq_target_latency;
+ u64 cfq_fifo_expire[2];
+ u64 cfq_slice[2];
+ u64 cfq_slice_idle;
+ u64 cfq_group_idle;
+ u64 cfq_target_latency;
/*
* Fallback dummy cfqq for extreme OOM conditions
*/
struct cfq_queue oom_cfqq;
- unsigned long last_delayed_sync;
+ u64 last_delayed_sync;
};
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -667,15 +667,16 @@ static inline void cfqg_put(struct cfq_group *cfqg)
} while (0)
static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
- struct cfq_group *curr_cfqg, int rw)
+ struct cfq_group *curr_cfqg, int op,
+ int op_flags)
{
- blkg_rwstat_add(&cfqg->stats.queued, rw, 1);
+ blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, 1);
cfqg_stats_end_empty_time(&cfqg->stats);
cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
}
static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
- unsigned long time, unsigned long unaccounted_time)
+ uint64_t time, unsigned long unaccounted_time)
{
blkg_stat_add(&cfqg->stats.time, time);
#ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -683,26 +684,30 @@ static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
#endif
}
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw)
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
+ int op_flags)
{
- blkg_rwstat_add(&cfqg->stats.queued, rw, -1);
+ blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, -1);
}
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw)
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
+ int op_flags)
{
- blkg_rwstat_add(&cfqg->stats.merged, rw, 1);
+ blkg_rwstat_add(&cfqg->stats.merged, op, op_flags, 1);
}
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- uint64_t start_time, uint64_t io_start_time, int rw)
+ uint64_t start_time, uint64_t io_start_time, int op,
+ int op_flags)
{
struct cfqg_stats *stats = &cfqg->stats;
unsigned long long now = sched_clock();
if (time_after64(now, io_start_time))
- blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+ blkg_rwstat_add(&stats->service_time, op, op_flags,
+ now - io_start_time);
if (time_after64(io_start_time, start_time))
- blkg_rwstat_add(&stats->wait_time, rw,
+ blkg_rwstat_add(&stats->wait_time, op, op_flags,
io_start_time - start_time);
}
@@ -781,13 +786,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { }
#define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0)
static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
- struct cfq_group *curr_cfqg, int rw) { }
+ struct cfq_group *curr_cfqg, int op, int op_flags) { }
static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
- unsigned long time, unsigned long unaccounted_time) { }
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { }
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { }
+ uint64_t time, unsigned long unaccounted_time) { }
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
+ int op_flags) { }
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
+ int op_flags) { }
static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
- uint64_t start_time, uint64_t io_start_time, int rw) { }
+ uint64_t start_time, uint64_t io_start_time, int op,
+ int op_flags) { }
#endif /* CONFIG_CFQ_GROUP_IOSCHED */
@@ -807,7 +815,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
struct cfq_ttime *ttime, bool group_idle)
{
- unsigned long slice;
+ u64 slice;
if (!sample_valid(ttime->ttime_samples))
return false;
if (group_idle)
@@ -910,7 +918,7 @@ static inline struct cfq_data *cic_to_cfqd(struct cfq_io_cq *cic)
*/
static inline bool cfq_bio_sync(struct bio *bio)
{
- return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC);
+ return bio_data_dir(bio) == READ || (bio->bi_opf & REQ_SYNC);
}
/*
@@ -930,17 +938,18 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
* if a queue is marked sync and has sync io queued. A sync queue with async
* io only, should not get full sync slice length.
*/
-static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
+static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
unsigned short prio)
{
- const int base_slice = cfqd->cfq_slice[sync];
+ u64 base_slice = cfqd->cfq_slice[sync];
+ u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
WARN_ON(prio >= IOPRIO_BE_NR);
- return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
+ return base_slice + (slice * (4 - prio));
}
-static inline int
+static inline u64
cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
@@ -958,15 +967,14 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*
* The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
*/
-static inline u64 cfqg_scale_charge(unsigned long charge,
+static inline u64 cfqg_scale_charge(u64 charge,
unsigned int vfraction)
{
u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */
/* charge / vfraction */
c <<= CFQ_SERVICE_SHIFT;
- do_div(c, vfraction);
- return c;
+ return div_u64(c, vfraction);
}
static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
@@ -1019,16 +1027,16 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
return cfqg->busy_queues_avg[rt];
}
-static inline unsigned
+static inline u64
cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
}
-static inline unsigned
+static inline u64
cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
+ u64 slice = cfq_prio_to_slice(cfqd, cfqq);
if (cfqd->cfq_latency) {
/*
* interested queues (we consider only the ones with the same
@@ -1036,20 +1044,22 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
*/
unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
cfq_class_rt(cfqq));
- unsigned sync_slice = cfqd->cfq_slice[1];
- unsigned expect_latency = sync_slice * iq;
- unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
+ u64 sync_slice = cfqd->cfq_slice[1];
+ u64 expect_latency = sync_slice * iq;
+ u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
if (expect_latency > group_slice) {
- unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
+ u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
+ u64 low_slice;
+
/* scale low_slice according to IO priority
* and sync vs async */
- unsigned low_slice =
- min(slice, base_low_slice * slice / sync_slice);
+ low_slice = div64_u64(base_low_slice*slice, sync_slice);
+ low_slice = min(slice, low_slice);
/* the adapted slice value is scaled to fit all iqs
* into the target latency */
- slice = max(slice * group_slice / expect_latency,
- low_slice);
+ slice = div64_u64(slice*group_slice, expect_latency);
+ slice = max(slice, low_slice);
}
}
return slice;
@@ -1058,12 +1068,13 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+ u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+ u64 now = ktime_get_ns();
- cfqq->slice_start = jiffies;
- cfqq->slice_end = jiffies + slice;
+ cfqq->slice_start = now;
+ cfqq->slice_end = now + slice;
cfqq->allocated_slice = slice;
- cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
+ cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
}
/*
@@ -1075,7 +1086,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq)
{
if (cfq_cfqq_slice_new(cfqq))
return false;
- if (time_before(jiffies, cfqq->slice_end))
+ if (ktime_get_ns() < cfqq->slice_end)
return false;
return true;
@@ -1241,8 +1252,8 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
}
-static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
- struct cfq_queue *cfqq)
+static u64 cfq_slice_offset(struct cfq_data *cfqd,
+ struct cfq_queue *cfqq)
{
/*
* just an approximation, should be ok.
@@ -1435,31 +1446,32 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfqg_stats_update_dequeue(cfqg);
}
-static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
- unsigned int *unaccounted_time)
+static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
+ u64 *unaccounted_time)
{
- unsigned int slice_used;
+ u64 slice_used;
+ u64 now = ktime_get_ns();
/*
* Queue got expired before even a single request completed or
* got expired immediately after first request completion.
*/
- if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
+ if (!cfqq->slice_start || cfqq->slice_start == now) {
/*
* Also charge the seek time incurred to the group, otherwise
* if there are mutiple queues in the group, each can dispatch
* a single request on seeky media and cause lots of seek time
* and group will never know it.
*/
- slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
- 1);
+ slice_used = max_t(u64, (now - cfqq->dispatch_start),
+ jiffies_to_nsecs(1));
} else {
- slice_used = jiffies - cfqq->slice_start;
+ slice_used = now - cfqq->slice_start;
if (slice_used > cfqq->allocated_slice) {
*unaccounted_time = slice_used - cfqq->allocated_slice;
slice_used = cfqq->allocated_slice;
}
- if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+ if (cfqq->slice_start > cfqq->dispatch_start)
*unaccounted_time += cfqq->slice_start -
cfqq->dispatch_start;
}
@@ -1471,10 +1483,11 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
struct cfq_queue *cfqq)
{
struct cfq_rb_root *st = &cfqd->grp_service_tree;
- unsigned int used_sl, charge, unaccounted_sl = 0;
+ u64 used_sl, charge, unaccounted_sl = 0;
int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
- cfqg->service_tree_idle.count;
unsigned int vfr;
+ u64 now = ktime_get_ns();
BUG_ON(nr_sync < 0);
used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
@@ -1496,9 +1509,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
cfq_group_service_tree_add(st, cfqg);
/* This group is being expired. Save the context */
- if (time_after(cfqd->workload_expires, jiffies)) {
- cfqg->saved_wl_slice = cfqd->workload_expires
- - jiffies;
+ if (cfqd->workload_expires > now) {
+ cfqg->saved_wl_slice = cfqd->workload_expires - now;
cfqg->saved_wl_type = cfqd->serving_wl_type;
cfqg->saved_wl_class = cfqd->serving_wl_class;
} else
@@ -1507,7 +1519,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
st->min_vdisktime);
cfq_log_cfqq(cfqq->cfqd, cfqq,
- "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
+ "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
used_sl, cfqq->slice_dispatch, charge,
iops_mode(cfqd), cfqq->nr_sectors);
cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
@@ -1530,7 +1542,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
*st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->rb_node);
- cfqg->ttime.last_end_request = jiffies;
+ cfqg->ttime.last_end_request = ktime_get_ns();
}
#ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -2213,10 +2225,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
{
struct rb_node **p, *parent;
struct cfq_queue *__cfqq;
- unsigned long rb_key;
+ u64 rb_key;
struct cfq_rb_root *st;
int left;
int new_cfqq = 1;
+ u64 now = ktime_get_ns();
st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
if (cfq_class_idle(cfqq)) {
@@ -2226,7 +2239,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
__cfqq = rb_entry(parent, struct cfq_queue, rb_node);
rb_key += __cfqq->rb_key;
} else
- rb_key += jiffies;
+ rb_key += now;
} else if (!add_front) {
/*
* Get our rb key offset. Subtract any residual slice
@@ -2234,13 +2247,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
* count indicates slice overrun, and this should position
* the next service time further away in the tree.
*/
- rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
+ rb_key = cfq_slice_offset(cfqd, cfqq) + now;
rb_key -= cfqq->slice_resid;
cfqq->slice_resid = 0;
} else {
- rb_key = -HZ;
+ rb_key = -NSEC_PER_SEC;
__cfqq = cfq_rb_first(st);
- rb_key += __cfqq ? __cfqq->rb_key : jiffies;
+ rb_key += __cfqq ? __cfqq->rb_key : now;
}
if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
@@ -2266,7 +2279,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
/*
* sort by key, that represents service time.
*/
- if (time_before(rb_key, __cfqq->rb_key))
+ if (rb_key < __cfqq->rb_key)
p = &parent->rb_left;
else {
p = &parent->rb_right;
@@ -2461,10 +2474,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
{
elv_rb_del(&cfqq->sort_list, rq);
cfqq->queued[rq_is_sync(rq)]--;
- cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
+ cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
cfq_add_rq_rb(rq);
cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
- rq->cmd_flags);
+ req_op(rq), rq->cmd_flags);
}
static struct request *
@@ -2517,7 +2530,7 @@ static void cfq_remove_request(struct request *rq)
cfq_del_rq_rb(rq);
cfqq->cfqd->rq_queued--;
- cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
+ cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
if (rq->cmd_flags & REQ_PRIO) {
WARN_ON(!cfqq->prio_pending);
cfqq->prio_pending--;
@@ -2531,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req,
struct request *__rq;
__rq = cfq_find_rq_fmerge(cfqd, bio);
- if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
return ELEVATOR_FRONT_MERGE;
}
@@ -2552,7 +2565,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
static void cfq_bio_merged(struct request_queue *q, struct request *req,
struct bio *bio)
{
- cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw);
+ cfqg_stats_update_io_merged(RQ_CFQG(req), bio_op(bio), bio->bi_opf);
}
static void
@@ -2566,7 +2579,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
* reposition in fifo if next is older than rq
*/
if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
- time_before(next->fifo_time, rq->fifo_time) &&
+ next->fifo_time < rq->fifo_time &&
cfqq == RQ_CFQQ(next)) {
list_move(&rq->queuelist, &next->queuelist);
rq->fifo_time = next->fifo_time;
@@ -2575,7 +2588,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
if (cfqq->next_rq == next)
cfqq->next_rq = rq;
cfq_remove_request(next);
- cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
+ cfqg_stats_update_io_merged(RQ_CFQG(rq), req_op(next), next->cmd_flags);
cfqq = RQ_CFQQ(next);
/*
@@ -2588,8 +2601,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
cfq_del_cfqq_rr(cfqd, cfqq);
}
-static int cfq_allow_merge(struct request_queue *q, struct request *rq,
- struct bio *bio)
+static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
+ struct bio *bio)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct cfq_io_cq *cic;
@@ -2613,9 +2626,15 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
return cfqq == RQ_CFQQ(rq);
}
+static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
+ struct request *next)
+{
+ return RQ_CFQQ(rq) == RQ_CFQQ(next);
+}
+
static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
- del_timer(&cfqd->idle_slice_timer);
+ hrtimer_try_to_cancel(&cfqd->idle_slice_timer);
cfqg_stats_update_idle_time(cfqq->cfqg);
}
@@ -2627,7 +2646,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
cfqd->serving_wl_class, cfqd->serving_wl_type);
cfqg_stats_update_avg_queue_size(cfqq->cfqg);
cfqq->slice_start = 0;
- cfqq->dispatch_start = jiffies;
+ cfqq->dispatch_start = ktime_get_ns();
cfqq->allocated_slice = 0;
cfqq->slice_end = 0;
cfqq->slice_dispatch = 0;
@@ -2676,8 +2695,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (cfq_cfqq_slice_new(cfqq))
cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
else
- cfqq->slice_resid = cfqq->slice_end - jiffies;
- cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
+ cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
+ cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid);
}
cfq_group_served(cfqd, cfqq->cfqg, cfqq);
@@ -2911,7 +2930,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
struct cfq_queue *cfqq = cfqd->active_queue;
struct cfq_rb_root *st = cfqq->service_tree;
struct cfq_io_cq *cic;
- unsigned long sl, group_idle = 0;
+ u64 sl, group_idle = 0;
+ u64 now = ktime_get_ns();
/*
* SSD device without seek penalty, disable idling. But only do so
@@ -2954,8 +2974,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* time slice.
*/
if (sample_valid(cic->ttime.ttime_samples) &&
- (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
- cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
+ (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
+ cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
cic->ttime.ttime_mean);
return;
}
@@ -2976,9 +2996,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
else
sl = cfqd->cfq_slice_idle;
- mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+ hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl),
+ HRTIMER_MODE_REL);
cfqg_stats_set_start_idle_time(cfqq->cfqg);
- cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
+ cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
group_idle ? 1 : 0);
}
@@ -3018,7 +3039,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
return NULL;
rq = rq_entry_fifo(cfqq->fifo.next);
- if (time_before(jiffies, rq->fifo_time))
+ if (ktime_get_ns() < rq->fifo_time)
rq = NULL;
cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
@@ -3096,14 +3117,14 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
struct cfq_queue *queue;
int i;
bool key_valid = false;
- unsigned long lowest_key = 0;
+ u64 lowest_key = 0;
enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
for (i = 0; i <= SYNC_WORKLOAD; ++i) {
/* select the one with lowest rb_key */
queue = cfq_rb_first(st_for(cfqg, wl_class, i));
if (queue &&
- (!key_valid || time_before(queue->rb_key, lowest_key))) {
+ (!key_valid || queue->rb_key < lowest_key)) {
lowest_key = queue->rb_key;
cur_best = i;
key_valid = true;
@@ -3116,11 +3137,12 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
static void
choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
{
- unsigned slice;
+ u64 slice;
unsigned count;
struct cfq_rb_root *st;
- unsigned group_slice;
+ u64 group_slice;
enum wl_class_t original_class = cfqd->serving_wl_class;
+ u64 now = ktime_get_ns();
/* Choose next priority. RT > BE > IDLE */
if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -3129,7 +3151,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfqd->serving_wl_class = BE_WORKLOAD;
else {
cfqd->serving_wl_class = IDLE_WORKLOAD;
- cfqd->workload_expires = jiffies + 1;
+ cfqd->workload_expires = now + jiffies_to_nsecs(1);
return;
}
@@ -3147,7 +3169,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
/*
* check workload expiration, and that we still have other queues ready
*/
- if (count && !time_after(jiffies, cfqd->workload_expires))
+ if (count && !(now > cfqd->workload_expires))
return;
new_workload:
@@ -3164,13 +3186,13 @@ new_workload:
*/
group_slice = cfq_group_slice(cfqd, cfqg);
- slice = group_slice * count /
+ slice = div_u64(group_slice * count,
max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
- cfqg));
+ cfqg)));
if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
- unsigned int tmp;
+ u64 tmp;
/*
* Async queues are currently system wide. Just taking
@@ -3181,19 +3203,19 @@ new_workload:
*/
tmp = cfqd->cfq_target_latency *
cfqg_busy_async_queues(cfqd, cfqg);
- tmp = tmp/cfqd->busy_queues;
- slice = min_t(unsigned, slice, tmp);
+ tmp = div_u64(tmp, cfqd->busy_queues);
+ slice = min_t(u64, slice, tmp);
/* async workload slice is scaled down according to
* the sync/async slice ratio. */
- slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
+ slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
} else
/* sync workload slice is at least 2 * cfq_slice_idle */
slice = max(slice, 2 * cfqd->cfq_slice_idle);
- slice = max_t(unsigned, slice, CFQ_MIN_TT);
- cfq_log(cfqd, "workload slice:%d", slice);
- cfqd->workload_expires = jiffies + slice;
+ slice = max_t(u64, slice, CFQ_MIN_TT);
+ cfq_log(cfqd, "workload slice:%llu", slice);
+ cfqd->workload_expires = now + slice;
}
static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
@@ -3211,16 +3233,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
static void cfq_choose_cfqg(struct cfq_data *cfqd)
{
struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
+ u64 now = ktime_get_ns();
cfqd->serving_group = cfqg;
/* Restore the workload type data */
if (cfqg->saved_wl_slice) {
- cfqd->workload_expires = jiffies + cfqg->saved_wl_slice;
+ cfqd->workload_expires = now + cfqg->saved_wl_slice;
cfqd->serving_wl_type = cfqg->saved_wl_type;
cfqd->serving_wl_class = cfqg->saved_wl_class;
} else
- cfqd->workload_expires = jiffies - 1;
+ cfqd->workload_expires = now - 1;
choose_wl_class_and_type(cfqd, cfqg);
}
@@ -3232,6 +3255,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq, *new_cfqq = NULL;
+ u64 now = ktime_get_ns();
cfqq = cfqd->active_queue;
if (!cfqq)
@@ -3292,7 +3316,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
* flight or is idling for a new request, allow either of these
* conditions to happen (or time out) before selecting a new queue.
*/
- if (timer_pending(&cfqd->idle_slice_timer)) {
+ if (hrtimer_active(&cfqd->idle_slice_timer)) {
cfqq = NULL;
goto keep_queue;
}
@@ -3303,7 +3327,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
**/
if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
(cfq_cfqq_slice_new(cfqq) ||
- (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+ (cfqq->slice_end - now > now - cfqq->slice_start))) {
cfq_clear_cfqq_deep(cfqq);
cfq_clear_cfqq_idle_window(cfqq);
}
@@ -3381,11 +3405,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
+ u64 now = ktime_get_ns();
+
/* the queue hasn't finished any request, can't estimate */
if (cfq_cfqq_slice_new(cfqq))
return true;
- if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
- cfqq->slice_end))
+ if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
return true;
return false;
@@ -3460,10 +3485,10 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* based on the last sync IO we serviced
*/
if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
- unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
+ u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
unsigned int depth;
- depth = last_sync / cfqd->cfq_slice[1];
+ depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
if (!depth && !cfqq->dispatched)
depth = 1;
if (depth < max_dispatch)
@@ -3546,7 +3571,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
cfq_class_idle(cfqq))) {
- cfqq->slice_end = jiffies + 1;
+ cfqq->slice_end = ktime_get_ns() + 1;
cfq_slice_expired(cfqd, 0);
}
@@ -3624,7 +3649,7 @@ static void cfq_init_icq(struct io_cq *icq)
{
struct cfq_io_cq *cic = icq_to_cic(icq);
- cic->ttime.last_end_request = jiffies;
+ cic->ttime.last_end_request = ktime_get_ns();
}
static void cfq_exit_icq(struct io_cq *icq)
@@ -3682,6 +3707,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
* elevate the priority of this queue
*/
cfqq->org_ioprio = cfqq->ioprio;
+ cfqq->org_ioprio_class = cfqq->ioprio_class;
cfq_clear_cfqq_prio_changed(cfqq);
}
@@ -3738,9 +3764,11 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
struct cfq_data *cfqd = cic_to_cfqd(cic);
struct cfq_queue *cfqq;
uint64_t serial_nr;
+ bool nonroot_cg;
rcu_read_lock();
serial_nr = bio_blkcg(bio)->css.serial_nr;
+ nonroot_cg = bio_blkcg(bio) != &blkcg_root;
rcu_read_unlock();
/*
@@ -3755,11 +3783,10 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
* do proper throttling of writes. Turn off wbt for that
* case.
*/
- if (bio_blkcg(bio) != &blkcg_root) {
+ if (nonroot_cg) {
struct request_queue *q = cfqd->queue;
- if (q->rq_wb)
- wbt_disable(q->rq_wb);
+ wbt_disable(q->rq_wb);
}
/*
@@ -3857,14 +3884,15 @@ out:
}
static void
-__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
{
- unsigned long elapsed = jiffies - ttime->last_end_request;
+ u64 elapsed = ktime_get_ns() - ttime->last_end_request;
elapsed = min(elapsed, 2UL * slice_idle);
ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
- ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
- ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+ ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8);
+ ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
+ ttime->ttime_samples);
}
static void
@@ -4117,10 +4145,10 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
cfq_log_cfqq(cfqd, cfqq, "insert_request");
cfq_init_prio_data(cfqq, RQ_CIC(rq));
- rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
+ rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &cfqq->fifo);
cfq_add_rq_rb(rq);
- cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
+ cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq),
rq->cmd_flags);
cfq_rq_enqueued(cfqd, cfqq, rq);
}
@@ -4165,6 +4193,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
struct cfq_io_cq *cic = cfqd->active_cic;
+ u64 now = ktime_get_ns();
/* If the queue already has requests, don't wait */
if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -4183,7 +4212,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
/* if slice left is less than think time, wait busy */
if (cic && sample_valid(cic->ttime.ttime_samples)
- && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
+ && (cfqq->slice_end - now < cic->ttime.ttime_mean))
return true;
/*
@@ -4193,7 +4222,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* case where think time is less than a jiffy, mark the queue wait
* busy if only 1 jiffy is left in the slice.
*/
- if (cfqq->slice_end - jiffies == 1)
+ if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
return true;
return false;
@@ -4204,9 +4233,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
const int sync = rq_is_sync(rq);
- unsigned long now;
+ u64 now = ktime_get_ns();
- now = jiffies;
cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
!!(rq->cmd_flags & REQ_NOIDLE));
@@ -4218,7 +4246,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqq->dispatched--;
(RQ_CFQG(rq))->dispatched--;
cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
- rq_io_start_time_ns(rq), rq->cmd_flags);
+ rq_io_start_time_ns(rq), req_op(rq),
+ rq->cmd_flags);
cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
@@ -4234,7 +4263,16 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqq_type(cfqq));
st->ttime.last_end_request = now;
- if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
+ /*
+ * We have to do this check in jiffies since start_time is in
+ * jiffies and it is not trivial to convert to ns. If
+ * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
+ * will become problematic but so far we are fine (the default
+ * is 128 ms).
+ */
+ if (!time_after(rq->start_time +
+ nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
+ jiffies))
cfqd->last_delayed_sync = now;
}
@@ -4259,10 +4297,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
* the queue.
*/
if (cfq_should_wait_busy(cfqd, cfqq)) {
- unsigned long extend_sl = cfqd->cfq_slice_idle;
+ u64 extend_sl = cfqd->cfq_slice_idle;
if (!cfqd->cfq_slice_idle)
extend_sl = cfqd->cfq_group_idle;
- cfqq->slice_end = jiffies + extend_sl;
+ cfqq->slice_end = now + extend_sl;
cfq_mark_cfqq_wait_busy(cfqq);
cfq_log_cfqq(cfqd, cfqq, "will busy wait");
}
@@ -4287,6 +4325,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_schedule_dispatch(cfqd);
}
+static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int op_flags)
+{
+ /*
+ * If REQ_PRIO is set, boost class and prio level, if it's below
+ * BE/NORM. If prio is not set, restore the potentially boosted
+ * class/prio level.
+ */
+ if (!(op_flags & REQ_PRIO)) {
+ cfqq->ioprio_class = cfqq->org_ioprio_class;
+ cfqq->ioprio = cfqq->org_ioprio;
+ } else {
+ if (cfq_class_idle(cfqq))
+ cfqq->ioprio_class = IOPRIO_CLASS_BE;
+ if (cfqq->ioprio > IOPRIO_NORM)
+ cfqq->ioprio = IOPRIO_NORM;
+ }
+}
+
static inline int __cfq_may_queue(struct cfq_queue *cfqq)
{
if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -4297,7 +4353,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq)
return ELV_MQUEUE_MAY;
}
-static int cfq_may_queue(struct request_queue *q, int rw)
+static int cfq_may_queue(struct request_queue *q, int op, int op_flags)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
struct task_struct *tsk = current;
@@ -4314,9 +4370,10 @@ static int cfq_may_queue(struct request_queue *q, int rw)
if (!cic)
return ELV_MQUEUE_MAY;
- cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
+ cfqq = cic_to_cfqq(cic, rw_is_sync(op, op_flags));
if (cfqq) {
cfq_init_prio_data(cfqq, cic);
+ cfqq_boost_on_prio(cfqq, op_flags);
return __cfq_may_queue(cfqq);
}
@@ -4447,9 +4504,10 @@ static void cfq_kick_queue(struct work_struct *work)
/*
* Timer running if the active_queue is currently idling inside its time slice
*/
-static void cfq_idle_slice_timer(unsigned long data)
+static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer)
{
- struct cfq_data *cfqd = (struct cfq_data *) data;
+ struct cfq_data *cfqd = container_of(timer, struct cfq_data,
+ idle_slice_timer);
struct cfq_queue *cfqq;
unsigned long flags;
int timed_out = 1;
@@ -4498,11 +4556,12 @@ out_kick:
cfq_schedule_dispatch(cfqd);
out_cont:
spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+ return HRTIMER_NORESTART;
}
static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
{
- del_timer_sync(&cfqd->idle_slice_timer);
+ hrtimer_cancel(&cfqd->idle_slice_timer);
cancel_work_sync(&cfqd->unplug_work);
}
@@ -4598,9 +4657,9 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
cfqg_put(cfqd->root_group);
spin_unlock_irq(q->queue_lock);
- init_timer(&cfqd->idle_slice_timer);
+ hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
- cfqd->idle_slice_timer.data = (unsigned long) cfqd;
INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
@@ -4621,7 +4680,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
* we optimistically start assuming sync ops weren't delayed in last
* second, in order to have larger depth for async operations.
*/
- cfqd->last_delayed_sync = jiffies - HZ;
+ cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
return 0;
out_free:
@@ -4664,9 +4723,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
static ssize_t __FUNC(struct elevator_queue *e, char *page) \
{ \
struct cfq_data *cfqd = e->elevator_data; \
- unsigned int __data = __VAR; \
+ u64 __data = __VAR; \
if (__CONV) \
- __data = jiffies_to_msecs(__data); \
+ __data = div_u64(__data, NSEC_PER_MSEC); \
return cfq_var_show(__data, (page)); \
}
SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
@@ -4683,6 +4742,21 @@ SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
#undef SHOW_FUNCTION
+#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
+{ \
+ struct cfq_data *cfqd = e->elevator_data; \
+ u64 __data = __VAR; \
+ __data = div_u64(__data, NSEC_PER_USEC); \
+ return cfq_var_show(__data, (page)); \
+}
+USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
+USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
+USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
+USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
+USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency);
+#undef USEC_SHOW_FUNCTION
+
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
@@ -4694,7 +4768,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
else if (__data > (MAX)) \
__data = (MAX); \
if (__CONV) \
- *(__PTR) = msecs_to_jiffies(__data); \
+ *(__PTR) = (u64)__data * NSEC_PER_MSEC; \
else \
*(__PTR) = __data; \
return ret; \
@@ -4717,6 +4791,26 @@ STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
#undef STORE_FUNCTION
+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
+{ \
+ struct cfq_data *cfqd = e->elevator_data; \
+ unsigned int __data; \
+ int ret = cfq_var_store(&__data, (page), count); \
+ if (__data < (MIN)) \
+ __data = (MIN); \
+ else if (__data > (MAX)) \
+ __data = (MAX); \
+ *(__PTR) = (u64)__data * NSEC_PER_USEC; \
+ return ret; \
+}
+USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX);
+#undef USEC_STORE_FUNCTION
+
#define CFQ_ATTR(name) \
__ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
@@ -4727,12 +4821,17 @@ static struct elv_fs_entry cfq_attrs[] = {
CFQ_ATTR(back_seek_max),
CFQ_ATTR(back_seek_penalty),
CFQ_ATTR(slice_sync),
+ CFQ_ATTR(slice_sync_us),
CFQ_ATTR(slice_async),
+ CFQ_ATTR(slice_async_us),
CFQ_ATTR(slice_async_rq),
CFQ_ATTR(slice_idle),
+ CFQ_ATTR(slice_idle_us),
CFQ_ATTR(group_idle),
+ CFQ_ATTR(group_idle_us),
CFQ_ATTR(low_latency),
CFQ_ATTR(target_latency),
+ CFQ_ATTR(target_latency_us),
__ATTR_NULL
};
@@ -4741,7 +4840,8 @@ static struct elevator_type iosched_cfq = {
.elevator_merge_fn = cfq_merge,
.elevator_merged_fn = cfq_merged_request,
.elevator_merge_req_fn = cfq_merged_requests,
- .elevator_allow_merge_fn = cfq_allow_merge,
+ .elevator_allow_bio_merge_fn = cfq_allow_bio_merge,
+ .elevator_allow_rq_merge_fn = cfq_allow_rq_merge,
.elevator_bio_merged_fn = cfq_bio_merged,
.elevator_dispatch_fn = cfq_dispatch_requests,
.elevator_add_req_fn = cfq_insert_request,
@@ -4788,18 +4888,7 @@ static int __init cfq_init(void)
{
int ret;
- /*
- * could be 0 on HZ < 1000 setups
- */
- if (!cfq_slice_async)
- cfq_slice_async = 1;
- if (!cfq_slice_idle)
- cfq_slice_idle = 1;
-
#ifdef CONFIG_CFQ_GROUP_IOSCHED
- if (!cfq_group_idle)
- cfq_group_idle = 1;
-
ret = blkcg_policy_register(&blkcg_policy_cfq);
if (ret)
return ret;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index d0dd7882d..55e0bb6d7 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -137,7 +137,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
if (__rq) {
BUG_ON(sector != blk_rq_pos(__rq));
- if (elv_rq_merge_ok(__rq, bio)) {
+ if (elv_bio_merge_ok(__rq, bio)) {
ret = ELEVATOR_FRONT_MERGE;
goto out;
}
@@ -173,7 +173,8 @@ deadline_merged_requests(struct request_queue *q, struct request *req,
* and move into next position (next will be deleted) in fifo
*/
if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
- if (time_before(next->fifo_time, req->fifo_time)) {
+ if (time_before((unsigned long)next->fifo_time,
+ (unsigned long)req->fifo_time)) {
list_move(&req->queuelist, &next->queuelist);
req->fifo_time = next->fifo_time;
}
@@ -227,7 +228,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
/*
* rq is expired!
*/
- if (time_after_eq(jiffies, rq->fifo_time))
+ if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
return 1;
return 0;
diff --git a/block/elevator.c b/block/elevator.c
index c3555c9c6..f7d973a56 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -53,13 +53,13 @@ static LIST_HEAD(elv_list);
* Query io scheduler to see if the current process issuing bio may be
* merged with rq.
*/
-static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
+static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
- if (e->type->ops.elevator_allow_merge_fn)
- return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
+ if (e->type->ops.elevator_allow_bio_merge_fn)
+ return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio);
return 1;
}
@@ -67,17 +67,17 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
/*
* can we safely merge with this request?
*/
-bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
{
if (!blk_rq_merge_ok(rq, bio))
- return 0;
+ return false;
- if (!elv_iosched_allow_merge(rq, bio))
- return 0;
+ if (!elv_iosched_allow_bio_merge(rq, bio))
+ return false;
- return 1;
+ return true;
}
-EXPORT_SYMBOL(elv_rq_merge_ok);
+EXPORT_SYMBOL(elv_bio_merge_ok);
static struct elevator_type *elevator_find(const char *name)
{
@@ -366,8 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
list_for_each_prev(entry, &q->queue_head) {
struct request *pos = list_entry_rq(entry);
- if ((rq->cmd_flags & REQ_DISCARD) !=
- (pos->cmd_flags & REQ_DISCARD))
+ if (req_op(rq) != req_op(pos))
break;
if (rq_data_dir(rq) != rq_data_dir(pos))
break;
@@ -426,7 +425,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
/*
* First try one-hit cache.
*/
- if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+ if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
ret = blk_try_merge(q->last_merge, bio);
if (ret != ELEVATOR_NO_MERGE) {
*req = q->last_merge;
@@ -441,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
* See if our hash lookup can find a potential backmerge.
*/
__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
- if (__rq && elv_rq_merge_ok(__rq, bio)) {
+ if (__rq && elv_bio_merge_ok(__rq, bio)) {
*req = __rq;
return ELEVATOR_BACK_MERGE;
}
@@ -717,12 +716,12 @@ void elv_put_request(struct request_queue *q, struct request *rq)
e->type->ops.elevator_put_req_fn(rq);
}
-int elv_may_queue(struct request_queue *q, int rw)
+int elv_may_queue(struct request_queue *q, int op, int op_flags)
{
struct elevator_queue *e = q->elevator;
if (e->type->ops.elevator_may_queue_fn)
- return e->type->ops.elevator_may_queue_fn(q, rw);
+ return e->type->ops.elevator_may_queue_fn(q, op, op_flags);
return ELV_MQUEUE_MAY;
}
diff --git a/block/genhd.c b/block/genhd.c
index a909bf8ba..fcd6d4fae 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -18,8 +18,6 @@
#include <linux/kobj_map.h>
#include <linux/mutex.h>
#include <linux/idr.h>
-#include <linux/ctype.h>
-#include <linux/fs_uuid.h>
#include <linux/log2.h>
#include <linux/pm_runtime.h>
#include <linux/badblocks.h>
@@ -508,7 +506,7 @@ static int exact_lock(dev_t devt, void *data)
return 0;
}
-static void register_disk(struct gendisk *disk)
+static void register_disk(struct device *parent, struct gendisk *disk)
{
struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
@@ -516,7 +514,7 @@ static void register_disk(struct gendisk *disk)
struct hd_struct *part;
int err;
- ddev->parent = disk->driverfs_dev;
+ ddev->parent = parent;
dev_set_name(ddev, "%s", disk->disk_name);
@@ -575,7 +573,8 @@ exit:
}
/**
- * add_disk - add partitioning information to kernel list
+ * device_add_disk - add partitioning information to kernel list
+ * @parent: parent device for the disk
* @disk: per-device partitioning information
*
* This function registers the partitioning information in @disk
@@ -583,7 +582,7 @@ exit:
*
* FIXME: error handling
*/
-void add_disk(struct gendisk *disk)
+void device_add_disk(struct device *parent, struct gendisk *disk)
{
struct backing_dev_info *bdi;
dev_t devt;
@@ -619,7 +618,7 @@ void add_disk(struct gendisk *disk)
blk_register_region(disk_devt(disk), disk->minors, NULL,
exact_match, exact_lock, disk);
- register_disk(disk);
+ register_disk(parent, disk);
blk_register_queue(disk);
/*
@@ -635,7 +634,7 @@ void add_disk(struct gendisk *disk)
disk_add_events(disk);
blk_integrity_add(disk);
}
-EXPORT_SYMBOL(add_disk);
+EXPORT_SYMBOL(device_add_disk);
void del_gendisk(struct gendisk *disk)
{
@@ -801,10 +800,9 @@ void __init printk_all_partitions(void)
, disk_name(disk, part->partno, name_buf),
part->info ? part->info->uuid : "");
if (is_part0) {
- if (disk->driverfs_dev != NULL &&
- disk->driverfs_dev->driver != NULL)
+ if (dev->parent && dev->parent->driver)
printk(" driver: %s\n",
- disk->driverfs_dev->driver->name);
+ dev->parent->driver->name);
else
printk(" (driver?)\n");
} else
@@ -1420,85 +1418,6 @@ int invalidate_partition(struct gendisk *disk, int partno)
EXPORT_SYMBOL(invalidate_partition);
-dev_t blk_lookup_fs_info(struct fs_info *seek)
-{
- dev_t devt = MKDEV(0, 0);
- struct class_dev_iter iter;
- struct device *dev;
- int best_score = 0;
-
- class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
- while (best_score < 3 && (dev = class_dev_iter_next(&iter))) {
- struct gendisk *disk = dev_to_disk(dev);
- struct disk_part_iter piter;
- struct hd_struct *part;
-
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
-
- while (best_score < 3 && (part = disk_part_iter_next(&piter))) {
- int score = part_matches_fs_info(part, seek);
- if (score > best_score) {
- devt = part_devt(part);
- best_score = score;
- }
- }
- disk_part_iter_exit(&piter);
- }
- class_dev_iter_exit(&iter);
- return devt;
-}
-
-/* Caller uses NULL, key to start. For each match found, we return a bdev on
- * which we have done blkdev_get, and we do the blkdev_put on block devices
- * that are passed to us. When no more matches are found, we return NULL.
- */
-struct block_device *next_bdev_of_type(struct block_device *last,
- const char *key)
-{
- dev_t devt = MKDEV(0, 0);
- struct class_dev_iter iter;
- struct device *dev;
- struct block_device *next = NULL, *bdev;
- int got_last = 0;
-
- if (!key)
- goto out;
-
- class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
- while (!devt && (dev = class_dev_iter_next(&iter))) {
- struct gendisk *disk = dev_to_disk(dev);
- struct disk_part_iter piter;
- struct hd_struct *part;
-
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
-
- while ((part = disk_part_iter_next(&piter))) {
- bdev = bdget(part_devt(part));
- if (last && !got_last) {
- if (last == bdev)
- got_last = 1;
- continue;
- }
-
- if (blkdev_get(bdev, FMODE_READ, 0))
- continue;
-
- if (bdev_matches_key(bdev, key)) {
- next = bdev;
- break;
- }
-
- blkdev_put(bdev, FMODE_READ);
- }
- disk_part_iter_exit(&piter);
- }
- class_dev_iter_exit(&iter);
-out:
- if (last)
- blkdev_put(last, FMODE_READ);
- return next;
-}
-
/*
* Disk events - monitor disk events like media change and eject request.
*/
@@ -1605,12 +1524,7 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
if (--ev->block)
goto out_unlock;
- /*
- * Not exactly a latency critical operation, set poll timer
- * slack to 25% and kick event check.
- */
intv = disk_events_poll_jiffies(disk);
- set_timer_slack(&ev->dwork.timer, intv / 4);
if (check_now)
queue_delayed_work(system_freezable_power_efficient_wq,
&ev->dwork, 0);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index d7eb77e1e..71d9ed9df 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -495,7 +495,6 @@ rescan:
/* add partitions */
for (p = 1; p < state->limit; p++) {
sector_t size, from;
- struct partition_meta_info *info = NULL;
size = state->parts[p].size;
if (!size)
@@ -530,8 +529,6 @@ rescan:
}
}
- if (state->parts[p].has_info)
- info = &state->parts[p].info;
part = add_partition(disk, p, from, size,
state->parts[p].flags,
&state->parts[p].info);
diff --git a/block/partitions/atari.c b/block/partitions/atari.c
index 9875b05e8..ff1fb9371 100644
--- a/block/partitions/atari.c
+++ b/block/partitions/atari.c
@@ -42,6 +42,13 @@ int atari_partition(struct parsed_partitions *state)
int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
#endif
+ /*
+ * ATARI partition scheme supports 512 lba only. If this is not
+ * the case, bail early to avoid miscalculating hd_size.
+ */
+ if (bdev_logical_block_size(state->bdev) != 512)
+ return 0;
+
rs = read_part_sector(state, 0, &sect);
if (!rs)
return -1;