summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig.iosched6
-rw-r--r--block/bfq-cgroup.c1282
-rw-r--r--block/bfq-ioc.c6
-rw-r--r--block/bfq-iosched.c1083
-rw-r--r--block/bfq-sched.c209
-rw-r--r--block/bfq.h206
6 files changed, 1620 insertions, 1172 deletions
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 01da733dc..1fc1a4dc5 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -51,14 +51,12 @@ config IOSCHED_BFQ
applications. If compiled built-in (saying Y here), BFQ can
be configured to support hierarchical scheduling.
-config CGROUP_BFQIO
+config BFQ_GROUP_IOSCHED
bool "BFQ hierarchical scheduling support"
depends on CGROUPS && IOSCHED_BFQ=y
default n
---help---
- Enable hierarchical scheduling in BFQ, using the cgroups
- filesystem interface. The name of the subsystem will be
- bfqio.
+ Enable hierarchical scheduling in BFQ, using the blkio controller.
choice
prompt "Default I/O scheduler"
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 11e2f1d4e..bc34d7a2b 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -13,254 +13,480 @@
* file.
*/
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
-static DEFINE_MUTEX(bfqio_mutex);
+/* bfqg stats flags */
+enum bfqg_stats_flags {
+ BFQG_stats_waiting = 0,
+ BFQG_stats_idling,
+ BFQG_stats_empty,
+};
-static bool bfqio_is_removed(struct bfqio_cgroup *bgrp)
-{
- return bgrp ? !bgrp->online : false;
-}
+#define BFQG_FLAG_FNS(name) \
+static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \
+{ \
+ stats->flags |= (1 << BFQG_stats_##name); \
+} \
+static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \
+{ \
+ stats->flags &= ~(1 << BFQG_stats_##name); \
+} \
+static int bfqg_stats_##name(struct bfqg_stats *stats) \
+{ \
+ return (stats->flags & (1 << BFQG_stats_##name)) != 0; \
+} \
-static struct bfqio_cgroup bfqio_root_cgroup = {
- .weight = BFQ_DEFAULT_GRP_WEIGHT,
- .ioprio = BFQ_DEFAULT_GRP_IOPRIO,
- .ioprio_class = BFQ_DEFAULT_GRP_CLASS,
-};
+BFQG_FLAG_FNS(waiting)
+BFQG_FLAG_FNS(idling)
+BFQG_FLAG_FNS(empty)
+#undef BFQG_FLAG_FNS
-static inline void bfq_init_entity(struct bfq_entity *entity,
- struct bfq_group *bfqg)
+/* This should be called with the queue_lock held. */
+static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
{
- entity->weight = entity->new_weight;
- entity->orig_weight = entity->new_weight;
- entity->ioprio = entity->new_ioprio;
- entity->ioprio_class = entity->new_ioprio_class;
- entity->parent = bfqg->my_entity;
- entity->sched_data = &bfqg->sched_data;
+ unsigned long long now;
+
+ if (!bfqg_stats_waiting(stats))
+ return;
+
+ now = sched_clock();
+ if (time_after64(now, stats->start_group_wait_time))
+ blkg_stat_add(&stats->group_wait_time,
+ now - stats->start_group_wait_time);
+ bfqg_stats_clear_waiting(stats);
}
-static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css)
+/* This should be called with the queue_lock held. */
+static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
+ struct bfq_group *curr_bfqg)
{
- return css ? container_of(css, struct bfqio_cgroup, css) : NULL;
+ struct bfqg_stats *stats = &bfqg->stats;
+
+ if (bfqg_stats_waiting(stats))
+ return;
+ if (bfqg == curr_bfqg)
+ return;
+ stats->start_group_wait_time = sched_clock();
+ bfqg_stats_mark_waiting(stats);
}
-/*
- * Search the bfq_group for bfqd into the hash table (by now only a list)
- * of bgrp. Must be called under rcu_read_lock().
- */
-static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
- struct bfq_data *bfqd)
+/* This should be called with the queue_lock held. */
+static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
{
- struct bfq_group *bfqg;
- void *key;
+ unsigned long long now;
- hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) {
- key = rcu_dereference(bfqg->bfqd);
- if (key == bfqd)
- return bfqg;
- }
+ if (!bfqg_stats_empty(stats))
+ return;
- return NULL;
+ now = sched_clock();
+ if (time_after64(now, stats->start_empty_time))
+ blkg_stat_add(&stats->empty_time,
+ now - stats->start_empty_time);
+ bfqg_stats_clear_empty(stats);
}
-static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
- struct bfq_group *bfqg)
+static void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
{
- struct bfq_entity *entity = &bfqg->entity;
+ blkg_stat_add(&bfqg->stats.dequeue, 1);
+}
+
+static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
+{
+ struct bfqg_stats *stats = &bfqg->stats;
+
+ if (blkg_rwstat_total(&stats->queued))
+ return;
/*
- * If the weight of the entity has never been set via the sysfs
- * interface, then bgrp->weight == 0. In this case we initialize
- * the weight from the current ioprio value. Otherwise, the group
- * weight, if set, has priority over the ioprio value.
+ * group is already marked empty. This can happen if bfqq got new
+ * request in parent group and moved to this group while being added
+ * to service tree. Just ignore the event and move on.
*/
- if (bgrp->weight == 0) {
- entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);
- entity->new_ioprio = bgrp->ioprio;
- } else {
- if (bgrp->weight < BFQ_MIN_WEIGHT ||
- bgrp->weight > BFQ_MAX_WEIGHT) {
- printk(KERN_CRIT "bfq_group_init_entity: "
- "bgrp->weight %d\n", bgrp->weight);
- BUG();
- }
- entity->new_weight = bgrp->weight;
- entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);
+ if (bfqg_stats_empty(stats))
+ return;
+
+ stats->start_empty_time = sched_clock();
+ bfqg_stats_mark_empty(stats);
+}
+
+static void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
+{
+ struct bfqg_stats *stats = &bfqg->stats;
+
+ if (bfqg_stats_idling(stats)) {
+ unsigned long long now = sched_clock();
+
+ if (time_after64(now, stats->start_idle_time))
+ blkg_stat_add(&stats->idle_time,
+ now - stats->start_idle_time);
+ bfqg_stats_clear_idling(stats);
}
- entity->orig_weight = entity->weight = entity->new_weight;
- entity->ioprio = entity->new_ioprio;
- entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
- entity->my_sched_data = &bfqg->sched_data;
- bfqg->active_entities = 0;
}
-static inline void bfq_group_set_parent(struct bfq_group *bfqg,
- struct bfq_group *parent)
+static void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
{
- struct bfq_entity *entity;
+ struct bfqg_stats *stats = &bfqg->stats;
- BUG_ON(parent == NULL);
- BUG_ON(bfqg == NULL);
+ stats->start_idle_time = sched_clock();
+ bfqg_stats_mark_idling(stats);
+}
- entity = &bfqg->entity;
- entity->parent = parent->my_entity;
- entity->sched_data = &parent->sched_data;
+static void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
+{
+ struct bfqg_stats *stats = &bfqg->stats;
+
+ blkg_stat_add(&stats->avg_queue_size_sum,
+ blkg_rwstat_total(&stats->queued));
+ blkg_stat_add(&stats->avg_queue_size_samples, 1);
+ bfqg_stats_update_group_wait_time(stats);
}
-/**
- * bfq_group_chain_alloc - allocate a chain of groups.
- * @bfqd: queue descriptor.
- * @css: the leaf cgroup_subsys_state this chain starts from.
- *
- * Allocate a chain of groups starting from the one belonging to
- * @cgroup up to the root cgroup. Stop if a cgroup on the chain
- * to the root has already an allocated group on @bfqd.
+static struct blkcg_policy blkcg_policy_bfq;
+
+/*
+ * blk-cgroup policy-related handlers
+ * The following functions help in converting between blk-cgroup
+ * internal structures and BFQ-specific structures.
*/
-static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
- struct cgroup_subsys_state *css)
+
+static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
{
- struct bfqio_cgroup *bgrp;
- struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
+ return pd ? container_of(pd, struct bfq_group, pd) : NULL;
+}
- for (; css != NULL; css = css->parent) {
- bgrp = css_to_bfqio(css);
+static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
+{
+ return pd_to_blkg(&bfqg->pd);
+}
- bfqg = bfqio_lookup_group(bgrp, bfqd);
- if (bfqg != NULL) {
- /*
- * All the cgroups in the path from there to the
- * root must have a bfq_group for bfqd, so we don't
- * need any more allocations.
- */
- break;
- }
+static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
+{
+ return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
+}
- bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
- if (bfqg == NULL)
- goto cleanup;
+/*
+ * bfq_group handlers
+ * The following functions help in navigating the bfq_group hierarchy
+ * by allowing to find the parent of a bfq_group or the bfq_group
+ * associated to a bfq_queue.
+ */
- bfq_group_init_entity(bgrp, bfqg);
- bfqg->my_entity = &bfqg->entity;
+static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
+{
+ struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
- if (leaf == NULL) {
- leaf = bfqg;
- prev = leaf;
- } else {
- bfq_group_set_parent(prev, bfqg);
- /*
- * Build a list of allocated nodes using the bfqd
- * filed, that is still unused and will be
- * initialized only after the node will be
- * connected.
- */
- prev->bfqd = bfqg;
- prev = bfqg;
- }
- }
+ return pblkg ? blkg_to_bfqg(pblkg) : NULL;
+}
- return leaf;
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
+{
+ struct bfq_entity *group_entity = bfqq->entity.parent;
-cleanup:
- while (leaf != NULL) {
- prev = leaf;
- leaf = leaf->bfqd;
- kfree(prev);
- }
+ return group_entity ? container_of(group_entity, struct bfq_group,
+ entity) :
+ bfqq->bfqd->root_group;
+}
+
+/*
+ * The following two functions handle get and put of a bfq_group by
+ * wrapping the related blk-cgroup hooks.
+ */
- return NULL;
+static void bfqg_get(struct bfq_group *bfqg)
+{
+ return blkg_get(bfqg_to_blkg(bfqg));
}
-/**
- * bfq_group_chain_link - link an allocated group chain to a cgroup
- * hierarchy.
- * @bfqd: the queue descriptor.
- * @css: the leaf cgroup_subsys_state to start from.
- * @leaf: the leaf group (to be associated to @cgroup).
- *
- * Try to link a chain of groups to a cgroup hierarchy, connecting the
- * nodes bottom-up, so we can be sure that when we find a cgroup in the
- * hierarchy that already as a group associated to @bfqd all the nodes
- * in the path to the root cgroup have one too.
- *
- * On locking: the queue lock protects the hierarchy (there is a hierarchy
- * per device) while the bfqio_cgroup lock protects the list of groups
- * belonging to the same cgroup.
+static void bfqg_put(struct bfq_group *bfqg)
+{
+ return blkg_put(bfqg_to_blkg(bfqg));
+}
+
+static void bfqg_stats_update_io_add(struct bfq_group *bfqg,
+ struct bfq_queue *bfqq,
+ int rw)
+{
+ blkg_rwstat_add(&bfqg->stats.queued, rw, 1);
+ bfqg_stats_end_empty_time(&bfqg->stats);
+ if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
+ bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
+}
+
+static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw)
+{
+ blkg_rwstat_add(&bfqg->stats.queued, rw, -1);
+}
+
+static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw)
+{
+ blkg_rwstat_add(&bfqg->stats.merged, rw, 1);
+}
+
+static void bfqg_stats_update_dispatch(struct bfq_group *bfqg,
+ uint64_t bytes, int rw)
+{
+ blkg_stat_add(&bfqg->stats.sectors, bytes >> 9);
+ blkg_rwstat_add(&bfqg->stats.serviced, rw, 1);
+ blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes);
+}
+
+static void bfqg_stats_update_completion(struct bfq_group *bfqg,
+ uint64_t start_time, uint64_t io_start_time, int rw)
+{
+ struct bfqg_stats *stats = &bfqg->stats;
+ unsigned long long now = sched_clock();
+
+ if (time_after64(now, io_start_time))
+ blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+ if (time_after64(io_start_time, start_time))
+ blkg_rwstat_add(&stats->wait_time, rw,
+ io_start_time - start_time);
+}
+
+/* @stats = 0 */
+static void bfqg_stats_reset(struct bfqg_stats *stats)
+{
+ if (!stats)
+ return;
+
+ /* queued stats shouldn't be cleared */
+ blkg_rwstat_reset(&stats->service_bytes);
+ blkg_rwstat_reset(&stats->serviced);
+ blkg_rwstat_reset(&stats->merged);
+ blkg_rwstat_reset(&stats->service_time);
+ blkg_rwstat_reset(&stats->wait_time);
+ blkg_stat_reset(&stats->time);
+ blkg_stat_reset(&stats->unaccounted_time);
+ blkg_stat_reset(&stats->avg_queue_size_sum);
+ blkg_stat_reset(&stats->avg_queue_size_samples);
+ blkg_stat_reset(&stats->dequeue);
+ blkg_stat_reset(&stats->group_wait_time);
+ blkg_stat_reset(&stats->idle_time);
+ blkg_stat_reset(&stats->empty_time);
+}
+
+/* @to += @from */
+static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from)
+{
+ if (!to || !from)
+ return;
+
+ /* queued stats shouldn't be cleared */
+ blkg_rwstat_merge(&to->service_bytes, &from->service_bytes);
+ blkg_rwstat_merge(&to->serviced, &from->serviced);
+ blkg_rwstat_merge(&to->merged, &from->merged);
+ blkg_rwstat_merge(&to->service_time, &from->service_time);
+ blkg_rwstat_merge(&to->wait_time, &from->wait_time);
+ blkg_stat_merge(&from->time, &from->time);
+ blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time);
+ blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
+ blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
+ blkg_stat_merge(&to->dequeue, &from->dequeue);
+ blkg_stat_merge(&to->group_wait_time, &from->group_wait_time);
+ blkg_stat_merge(&to->idle_time, &from->idle_time);
+ blkg_stat_merge(&to->empty_time, &from->empty_time);
+}
+
+/*
+ * Transfer @bfqg's stats to its parent's dead_stats so that the ancestors'
+ * recursive stats can still account for the amount used by this bfqg after
+ * it's gone.
*/
-static void bfq_group_chain_link(struct bfq_data *bfqd,
- struct cgroup_subsys_state *css,
- struct bfq_group *leaf)
+static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
{
- struct bfqio_cgroup *bgrp;
- struct bfq_group *bfqg, *next, *prev = NULL;
- unsigned long flags;
+ struct bfq_group *parent;
- assert_spin_locked(bfqd->queue->queue_lock);
+ if (!bfqg) /* root_group */
+ return;
- for (; css != NULL && leaf != NULL; css = css->parent) {
- bgrp = css_to_bfqio(css);
- next = leaf->bfqd;
+ parent = bfqg_parent(bfqg);
- bfqg = bfqio_lookup_group(bgrp, bfqd);
- BUG_ON(bfqg != NULL);
+ lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock);
- spin_lock_irqsave(&bgrp->lock, flags);
+ if (unlikely(!parent))
+ return;
- rcu_assign_pointer(leaf->bfqd, bfqd);
- hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
- hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
+ bfqg_stats_merge(&parent->dead_stats, &bfqg->stats);
+ bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats);
+ bfqg_stats_reset(&bfqg->stats);
+ bfqg_stats_reset(&bfqg->dead_stats);
+}
- spin_unlock_irqrestore(&bgrp->lock, flags);
+static void bfq_init_entity(struct bfq_entity *entity,
+ struct bfq_group *bfqg)
+{
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
- prev = leaf;
- leaf = next;
+ entity->weight = entity->new_weight;
+ entity->orig_weight = entity->new_weight;
+ if (bfqq) {
+ bfqq->ioprio = bfqq->new_ioprio;
+ bfqq->ioprio_class = bfqq->new_ioprio_class;
+ bfqg_get(bfqg);
}
+ entity->parent = bfqg->my_entity;
+ entity->sched_data = &bfqg->sched_data;
+}
- BUG_ON(css == NULL && leaf != NULL);
- if (css != NULL && prev != NULL) {
- bgrp = css_to_bfqio(css);
- bfqg = bfqio_lookup_group(bgrp, bfqd);
- bfq_group_set_parent(prev, bfqg);
- }
+static void bfqg_stats_init(struct bfqg_stats *stats)
+{
+ blkg_rwstat_init(&stats->service_bytes);
+ blkg_rwstat_init(&stats->serviced);
+ blkg_rwstat_init(&stats->merged);
+ blkg_rwstat_init(&stats->service_time);
+ blkg_rwstat_init(&stats->wait_time);
+ blkg_rwstat_init(&stats->queued);
+
+ blkg_stat_init(&stats->sectors);
+ blkg_stat_init(&stats->time);
+
+ blkg_stat_init(&stats->unaccounted_time);
+ blkg_stat_init(&stats->avg_queue_size_sum);
+ blkg_stat_init(&stats->avg_queue_size_samples);
+ blkg_stat_init(&stats->dequeue);
+ blkg_stat_init(&stats->group_wait_time);
+ blkg_stat_init(&stats->idle_time);
+ blkg_stat_init(&stats->empty_time);
+}
+
+static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
+ {
+ return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
+ }
+
+static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
+{
+ return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
+}
+
+static void bfq_cpd_init(const struct blkcg *blkcg)
+{
+ struct bfq_group_data *d =
+ cpd_to_bfqgd(blkcg->pd[blkcg_policy_bfq.plid]);
+
+ d->weight = BFQ_DEFAULT_GRP_WEIGHT;
+}
+
+static void bfq_pd_init(struct blkcg_gq *blkg)
+{
+ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+ struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
+ struct bfq_entity *entity = &bfqg->entity;
+ struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
+
+ entity->orig_weight = entity->weight = entity->new_weight = d->weight;
+ entity->my_sched_data = &bfqg->sched_data;
+ bfqg->my_entity = entity; /*
+ * the root_group's will be set to NULL
+ * in bfq_init_queue()
+ */
+ bfqg->bfqd = bfqd;
+ bfqg->active_entities = 0;
+ bfqg->rq_pos_tree = RB_ROOT;
+
+ /* if the root_group does not exist, we are handling it right now */
+ if (bfqd->root_group && bfqg != bfqd->root_group)
+ hlist_add_head(&bfqg->bfqd_node, &bfqd->group_list);
+
+ bfqg_stats_init(&bfqg->stats);
+ bfqg_stats_init(&bfqg->dead_stats);
+}
+
+/* offset delta from bfqg->stats to bfqg->dead_stats */
+static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) -
+ offsetof(struct bfq_group, stats);
+
+/* to be used by recursive prfill, sums live and dead stats recursively */
+static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
+{
+ u64 sum = 0;
+
+ sum += blkg_stat_recursive_sum(pd, off);
+ sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta);
+ return sum;
+}
+
+/* to be used by recursive prfill, sums live and dead rwstats recursively */
+static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
+ int off)
+{
+ struct blkg_rwstat a, b;
+
+ a = blkg_rwstat_recursive_sum(pd, off);
+ b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta);
+ blkg_rwstat_merge(&a, &b);
+ return a;
+}
+
+static void bfq_pd_reset_stats(struct blkcg_gq *blkg)
+{
+ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+
+ bfqg_stats_reset(&bfqg->stats);
+ bfqg_stats_reset(&bfqg->dead_stats);
+}
+
+static void bfq_group_set_parent(struct bfq_group *bfqg,
+ struct bfq_group *parent)
+{
+ struct bfq_entity *entity;
+
+ BUG_ON(!parent);
+ BUG_ON(!bfqg);
+ BUG_ON(bfqg == parent);
+
+ entity = &bfqg->entity;
+ entity->parent = parent->my_entity;
+ entity->sched_data = &parent->sched_data;
}
-/**
- * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
- * @bfqd: queue descriptor.
- * @cgroup: cgroup being searched for.
- *
- * Return a group associated to @bfqd in @cgroup, allocating one if
- * necessary. When a group is returned all the cgroups in the path
- * to the root have a group associated to @bfqd.
- *
- * If the allocation fails, return the root group: this breaks guarantees
- * but is a safe fallback. If this loss becomes a problem it can be
- * mitigated using the equivalent weight (given by the product of the
- * weights of the groups in the path from @group to the root) in the
- * root scheduler.
- *
- * We allocate all the missing nodes in the path from the leaf cgroup
- * to the root and we connect the nodes only after all the allocations
- * have been successful.
- */
static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
- struct cgroup_subsys_state *css)
+ struct blkcg *blkcg)
{
- struct bfqio_cgroup *bgrp = css_to_bfqio(css);
- struct bfq_group *bfqg;
+ struct request_queue *q = bfqd->queue;
+ struct bfq_group *bfqg = NULL, *parent;
+ struct bfq_entity *entity = NULL;
- bfqg = bfqio_lookup_group(bgrp, bfqd);
- if (bfqg != NULL)
- return bfqg;
+ assert_spin_locked(bfqd->queue->queue_lock);
- bfqg = bfq_group_chain_alloc(bfqd, css);
- if (bfqg != NULL)
- bfq_group_chain_link(bfqd, css, bfqg);
- else
+ /* avoid lookup for the common case where there's no blkcg */
+ if (blkcg == &blkcg_root) {
bfqg = bfqd->root_group;
+ } else {
+ struct blkcg_gq *blkg;
+
+ blkg = blkg_lookup_create(blkcg, q);
+ if (!IS_ERR(blkg))
+ bfqg = blkg_to_bfqg(blkg);
+ else /* fallback to root_group */
+ bfqg = bfqd->root_group;
+ }
+
+ BUG_ON(!bfqg);
+
+ /*
+ * Update chain of bfq_groups as we might be handling a leaf group
+ * which, along with some of its relatives, has not been hooked yet
+ * to the private hierarchy of BFQ.
+ */
+ entity = &bfqg->entity;
+ for_each_entity(entity) {
+ bfqg = container_of(entity, struct bfq_group, entity);
+ BUG_ON(!bfqg);
+ if (bfqg != bfqd->root_group) {
+ parent = bfqg_parent(bfqg);
+ if (!parent)
+ parent = bfqd->root_group;
+ BUG_ON(!parent);
+ bfq_group_set_parent(bfqg, parent);
+ }
+ }
return bfqg;
}
+static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
+
/**
* bfq_bfqq_move - migrate @bfqq to @bfqg.
* @bfqd: queue descriptor.
@@ -296,6 +522,7 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_deactivate_bfqq(bfqd, bfqq, 0);
} else if (entity->on_st)
bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
+ bfqg_put(bfqq_group(bfqq));
/*
* Here we use a reference to bfqg. We don't need a refcounter
@@ -304,11 +531,15 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
entity->parent = bfqg->my_entity;
entity->sched_data = &bfqg->sched_data;
+ bfqg_get(bfqg);
- if (busy && resume)
- bfq_activate_bfqq(bfqd, bfqq);
+ if (busy) {
+ bfq_pos_tree_add_move(bfqd, bfqq);
+ if (resume)
+ bfq_activate_bfqq(bfqd, bfqq);
+ }
- if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)
+ if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
bfq_schedule_dispatch(bfqd);
}
@@ -316,9 +547,9 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* __bfq_bic_change_cgroup - move @bic to @cgroup.
* @bfqd: the queue descriptor.
* @bic: the bic to move.
- * @cgroup: the cgroup to move to.
+ * @blkcg: the blk-cgroup to move to.
*
- * Move bic to cgroup, assuming that bfqd->queue is locked; the caller
+ * Move bic to blkcg, assuming that bfqd->queue is locked; the caller
* has to make sure that the reference to cgroup is valid across the call.
*
* NOTE: an alternative approach might have been to store the current
@@ -327,18 +558,17 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
struct bfq_io_cq *bic,
- struct cgroup_subsys_state *css)
+ struct blkcg *blkcg)
{
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
- struct bfq_entity *entity;
struct bfq_group *bfqg;
- struct bfqio_cgroup *bgrp;
+ struct bfq_entity *entity;
- bgrp = css_to_bfqio(css);
+ lockdep_assert_held(bfqd->queue->queue_lock);
- bfqg = bfq_find_alloc_group(bfqd, css);
- if (async_bfqq != NULL) {
+ bfqg = bfq_find_alloc_group(bfqd, blkcg);
+ if (async_bfqq) {
entity = &async_bfqq->entity;
if (entity->sched_data != &bfqg->sched_data) {
@@ -350,7 +580,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
}
}
- if (sync_bfqq != NULL) {
+ if (sync_bfqq) {
entity = &sync_bfqq->entity;
if (entity->sched_data != &bfqg->sched_data)
bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
@@ -359,74 +589,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
return bfqg;
}
-/**
- * bfq_bic_change_cgroup - move @bic to @cgroup.
- * @bic: the bic being migrated.
- * @cgroup: the destination cgroup.
- *
- * When the task owning @bic is moved to @cgroup, @bic is immediately
- * moved into its new parent group.
- */
-static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
- struct cgroup_subsys_state *css)
-{
- struct bfq_data *bfqd;
- unsigned long uninitialized_var(flags);
-
- bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
- &flags);
- if (bfqd != NULL) {
- __bfq_bic_change_cgroup(bfqd, bic, css);
- bfq_put_bfqd_unlock(bfqd, &flags);
- }
-}
-
-/**
- * bfq_bic_update_cgroup - update the cgroup of @bic.
- * @bic: the @bic to update.
- *
- * Make sure that @bic is enqueued in the cgroup of the current task.
- * We need this in addition to moving bics during the cgroup attach
- * phase because the task owning @bic could be at its first disk
- * access or we may end up in the root cgroup as the result of a
- * memory allocation failure and here we try to move to the right
- * group.
- *
- * Must be called under the queue lock. It is safe to use the returned
- * value even after the rcu_read_unlock() as the migration/destruction
- * paths act under the queue lock too. IOW it is impossible to race with
- * group migration/destruction and end up with an invalid group as:
- * a) here cgroup has not yet been destroyed, nor its destroy callback
- * has started execution, as current holds a reference to it,
- * b) if it is destroyed after rcu_read_unlock() [after current is
- * migrated to a different cgroup] its attach() callback will have
- * taken care of remove all the references to the old cgroup data.
- */
-static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)
+static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
{
struct bfq_data *bfqd = bic_to_bfqd(bic);
- struct bfq_group *bfqg;
- struct cgroup_subsys_state *css;
-
- BUG_ON(bfqd == NULL);
+ struct blkcg *blkcg;
+ struct bfq_group *bfqg = NULL;
+ uint64_t id;
rcu_read_lock();
- css = task_css(current, bfqio_cgrp_id);
- bfqg = __bfq_bic_change_cgroup(bfqd, bic, css);
+ blkcg = bio_blkcg(bio);
+ id = blkcg->css.serial_nr;
rcu_read_unlock();
- return bfqg;
+ /*
+ * Check whether blkcg has changed. The condition may trigger
+ * spuriously on a newly created cic but there's no harm.
+ */
+ if (unlikely(!bfqd) || likely(bic->blkcg_id == id))
+ return;
+
+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg);
+ BUG_ON(!bfqg);
+ bic->blkcg_id = id;
}
/**
* bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
* @st: the service tree being flushed.
*/
-static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
+static void bfq_flush_idle_tree(struct bfq_service_tree *st)
{
struct bfq_entity *entity = st->first_idle;
- for (; entity != NULL; entity = st->first_idle)
+ for (; entity ; entity = st->first_idle)
__bfq_deactivate_entity(entity, 0);
}
@@ -435,12 +630,12 @@ static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
* @bfqd: the device data structure with the root group.
* @entity: the entity to move.
*/
-static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
- struct bfq_entity *entity)
+static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
+ struct bfq_entity *entity)
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
- BUG_ON(bfqq == NULL);
+ BUG_ON(!bfqq);
bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
return;
}
@@ -454,9 +649,9 @@ static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
*
* Needs queue_lock to be taken and reference to be valid over the call.
*/
-static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
- struct bfq_group *bfqg,
- struct bfq_service_tree *st)
+static void bfq_reparent_active_entities(struct bfq_data *bfqd,
+ struct bfq_group *bfqg,
+ struct bfq_service_tree *st)
{
struct rb_root *active = &st->active;
struct bfq_entity *entity = NULL;
@@ -464,10 +659,10 @@ static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
if (!RB_EMPTY_ROOT(&st->active))
entity = bfq_entity_of(rb_first(active));
- for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))
+ for (; entity ; entity = bfq_entity_of(rb_first(active)))
bfq_reparent_leaf_entity(bfqd, entity);
- if (bfqg->sched_data.in_service_entity != NULL)
+ if (bfqg->sched_data.in_service_entity)
bfq_reparent_leaf_entity(bfqd,
bfqg->sched_data.in_service_entity);
@@ -476,20 +671,21 @@ static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
/**
* bfq_destroy_group - destroy @bfqg.
- * @bgrp: the bfqio_cgroup containing @bfqg.
* @bfqg: the group being destroyed.
*
* Destroy @bfqg, making sure that it is not referenced from its parent.
+ * blkio already grabs the queue_lock for us, so no need to use RCU-based magic
*/
-static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
+static void bfq_pd_offline(struct blkcg_gq *blkg)
{
- struct bfq_data *bfqd;
struct bfq_service_tree *st;
+ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+ struct bfq_data *bfqd = bfqg->bfqd;
struct bfq_entity *entity = bfqg->my_entity;
- unsigned long uninitialized_var(flags);
int i;
- hlist_del(&bfqg->group_node);
+ if (!entity) /* root group */
+ return;
/*
* Empty all service_trees belonging to this group before
@@ -518,37 +714,19 @@ static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
* There is no need to put the sync queues, as the
* scheduler has taken no reference.
*/
- bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
- if (bfqd != NULL) {
- bfq_reparent_active_entities(bfqd, bfqg, st);
- bfq_put_bfqd_unlock(bfqd, &flags);
- }
+ bfq_reparent_active_entities(bfqd, bfqg, st);
BUG_ON(!RB_EMPTY_ROOT(&st->active));
BUG_ON(!RB_EMPTY_ROOT(&st->idle));
}
- BUG_ON(bfqg->sched_data.next_in_service != NULL);
- BUG_ON(bfqg->sched_data.in_service_entity != NULL);
+ BUG_ON(bfqg->sched_data.next_in_service);
+ BUG_ON(bfqg->sched_data.in_service_entity);
- /*
- * We may race with device destruction, take extra care when
- * dereferencing bfqg->bfqd.
- */
- bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
- if (bfqd != NULL) {
- hlist_del(&bfqg->bfqd_node);
- __bfq_deactivate_entity(entity, 0);
- bfq_put_async_queues(bfqd, bfqg);
- bfq_put_bfqd_unlock(bfqd, &flags);
- }
- BUG_ON(entity->tree != NULL);
+ hlist_del(&bfqg->bfqd_node);
+ __bfq_deactivate_entity(entity, 0);
+ bfq_put_async_queues(bfqd, bfqg);
+ BUG_ON(entity->tree);
- /*
- * No need to defer the kfree() to the end of the RCU grace
- * period: we are called from the destroy() callback of our
- * cgroup, so we can be sure that no one is a) still using
- * this cgroup or b) doing lookups in it.
- */
- kfree(bfqg);
+ bfqg_stats_xfer_dead(bfqg);
}
static void bfq_end_wr_async(struct bfq_data *bfqd)
@@ -595,312 +773,309 @@ static void bfq_disconnect_groups(struct bfq_data *bfqd)
}
}
-static inline void bfq_free_root_group(struct bfq_data *bfqd)
+static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css,
+ struct cftype *cftype)
{
- struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
- struct bfq_group *bfqg = bfqd->root_group;
-
- bfq_put_async_queues(bfqd, bfqg);
+ struct blkcg *blkcg = css_to_blkcg(css);
+ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+ int ret = -EINVAL;
- spin_lock_irq(&bgrp->lock);
- hlist_del_rcu(&bfqg->group_node);
- spin_unlock_irq(&bgrp->lock);
+ spin_lock_irq(&blkcg->lock);
+ ret = bfqgd->weight;
+ spin_unlock_irq(&blkcg->lock);
- /*
- * No need to synchronize_rcu() here: since the device is gone
- * there cannot be any read-side access to its root_group.
- */
- kfree(bfqg);
+ return ret;
}
-static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
+static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
+ struct cftype *cftype,
+ u64 val)
{
- struct bfq_group *bfqg;
- struct bfqio_cgroup *bgrp;
- int i;
-
- bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);
- if (bfqg == NULL)
- return NULL;
-
- bfqg->entity.parent = NULL;
- for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
- bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
-
- bgrp = &bfqio_root_cgroup;
- spin_lock_irq(&bgrp->lock);
- rcu_assign_pointer(bfqg->bfqd, bfqd);
- hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
- spin_unlock_irq(&bgrp->lock);
+ struct blkcg *blkcg = css_to_blkcg(css);
+ struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+ struct blkcg_gq *blkg;
+ int ret = -EINVAL;
+
+ if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
+ return ret;
+
+ ret = 0;
+ spin_lock_irq(&blkcg->lock);
+ bfqgd->weight = (unsigned short)val;
+ hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+ struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+ if (!bfqg)
+ continue;
+ /*
+ * Setting the prio_changed flag of the entity
+ * to 1 with new_weight == weight would re-set
+ * the value of the weight to its ioprio mapping.
+ * Set the flag only if necessary.
+ */
+ if ((unsigned short)val != bfqg->entity.new_weight) {
+ bfqg->entity.new_weight = (unsigned short)val;
+ /*
+ * Make sure that the above new value has been
+ * stored in bfqg->entity.new_weight before
+ * setting the prio_changed flag. In fact,
+ * this flag may be read asynchronously (in
+ * critical sections protected by a different
+ * lock than that held here), and finding this
+ * flag set may cause the execution of the code
+ * for updating parameters whose value may
+ * depend also on bfqg->entity.new_weight (in
+ * __bfq_entity_update_weight_prio).
+ * This barrier makes sure that the new value
+ * of bfqg->entity.new_weight is correctly
+ * seen in that code.
+ */
+ smp_wmb();
+ bfqg->entity.prio_changed = 1;
+ }
+ }
+ spin_unlock_irq(&blkcg->lock);
- return bfqg;
+ return ret;
}
-#define SHOW_FUNCTION(__VAR) \
-static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \
- struct cftype *cftype) \
-{ \
- struct bfqio_cgroup *bgrp = css_to_bfqio(css); \
- u64 ret = -ENODEV; \
- \
- mutex_lock(&bfqio_mutex); \
- if (bfqio_is_removed(bgrp)) \
- goto out_unlock; \
- \
- spin_lock_irq(&bgrp->lock); \
- ret = bgrp->__VAR; \
- spin_unlock_irq(&bgrp->lock); \
- \
-out_unlock: \
- mutex_unlock(&bfqio_mutex); \
- return ret; \
-}
-
-SHOW_FUNCTION(weight);
-SHOW_FUNCTION(ioprio);
-SHOW_FUNCTION(ioprio_class);
-#undef SHOW_FUNCTION
-
-#define STORE_FUNCTION(__VAR, __MIN, __MAX) \
-static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\
- struct cftype *cftype, \
- u64 val) \
-{ \
- struct bfqio_cgroup *bgrp = css_to_bfqio(css); \
- struct bfq_group *bfqg; \
- int ret = -EINVAL; \
- \
- if (val < (__MIN) || val > (__MAX)) \
- return ret; \
- \
- ret = -ENODEV; \
- mutex_lock(&bfqio_mutex); \
- if (bfqio_is_removed(bgrp)) \
- goto out_unlock; \
- ret = 0; \
- \
- spin_lock_irq(&bgrp->lock); \
- bgrp->__VAR = (unsigned short)val; \
- hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) { \
- /* \
- * Setting the ioprio_changed flag of the entity \
- * to 1 with new_##__VAR == ##__VAR would re-set \
- * the value of the weight to its ioprio mapping. \
- * Set the flag only if necessary. \
- */ \
- if ((unsigned short)val != bfqg->entity.new_##__VAR) { \
- bfqg->entity.new_##__VAR = (unsigned short)val; \
- /* \
- * Make sure that the above new value has been \
- * stored in bfqg->entity.new_##__VAR before \
- * setting the ioprio_changed flag. In fact, \
- * this flag may be read asynchronously (in \
- * critical sections protected by a different \
- * lock than that held here), and finding this \
- * flag set may cause the execution of the code \
- * for updating parameters whose value may \
- * depend also on bfqg->entity.new_##__VAR (in \
- * __bfq_entity_update_weight_prio). \
- * This barrier makes sure that the new value \
- * of bfqg->entity.new_##__VAR is correctly \
- * seen in that code. \
- */ \
- smp_wmb(); \
- bfqg->entity.ioprio_changed = 1; \
- } \
- } \
- spin_unlock_irq(&bgrp->lock); \
- \
-out_unlock: \
- mutex_unlock(&bfqio_mutex); \
- return ret; \
-}
-
-STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
-STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
-STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
-#undef STORE_FUNCTION
+static int bfqg_print_stat(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
+ &blkcg_policy_bfq, seq_cft(sf)->private, false);
+ return 0;
+}
-static struct cftype bfqio_files[] = {
- {
- .name = "weight",
- .read_u64 = bfqio_cgroup_weight_read,
- .write_u64 = bfqio_cgroup_weight_write,
- },
- {
- .name = "ioprio",
- .read_u64 = bfqio_cgroup_ioprio_read,
- .write_u64 = bfqio_cgroup_ioprio_write,
- },
- {
- .name = "ioprio_class",
- .read_u64 = bfqio_cgroup_ioprio_class_read,
- .write_u64 = bfqio_cgroup_ioprio_class_write,
- },
- { }, /* terminate */
-};
+static int bfqg_print_rwstat(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
+ &blkcg_policy_bfq, seq_cft(sf)->private, true);
+ return 0;
+}
-static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state
- *parent_css)
+static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
{
- struct bfqio_cgroup *bgrp;
+ u64 sum = bfqg_stat_pd_recursive_sum(pd, off);
- if (parent_css != NULL) {
- bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
- if (bgrp == NULL)
- return ERR_PTR(-ENOMEM);
- } else
- bgrp = &bfqio_root_cgroup;
+ return __blkg_prfill_u64(sf, pd, sum);
+}
- spin_lock_init(&bgrp->lock);
- INIT_HLIST_HEAD(&bgrp->group_data);
- bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
- bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
+static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+{
+ struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off);
- return &bgrp->css;
+ return __blkg_prfill_rwstat(sf, pd, &sum);
}
-/*
- * We cannot support shared io contexts, as we have no means to support
- * two tasks with the same ioc in two different groups without major rework
- * of the main bic/bfqq data structures. By now we allow a task to change
- * its cgroup only if it's the only owner of its ioc; the drawback of this
- * behavior is that a group containing a task that forked using CLONE_IO
- * will not be destroyed until the tasks sharing the ioc die.
- */
-static int bfqio_can_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
{
- struct task_struct *task;
- struct io_context *ioc;
- int ret = 0;
-
- cgroup_taskset_for_each(task, tset) {
- /*
- * task_lock() is needed to avoid races with
- * exit_io_context()
- */
- task_lock(task);
- ioc = task->io_context;
- if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
- /*
- * ioc == NULL means that the task is either too
- * young or exiting: if it has still no ioc the
- * ioc can't be shared, if the task is exiting the
- * attach will fail anyway, no matter what we
- * return here.
- */
- ret = -EINVAL;
- task_unlock(task);
- if (ret)
- break;
- }
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
+ seq_cft(sf)->private, false);
+ return 0;
+}
- return ret;
+static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
+ seq_cft(sf)->private, true);
+ return 0;
}
-static void bfqio_attach(struct cgroup_subsys_state *css,
- struct cgroup_taskset *tset)
+static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
{
- struct task_struct *task;
- struct io_context *ioc;
- struct io_cq *icq;
+ struct bfq_group *bfqg = pd_to_bfqg(pd);
+ u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples);
+ u64 v = 0;
- /*
- * IMPORTANT NOTE: The move of more than one process at a time to a
- * new group has not yet been tested.
- */
- cgroup_taskset_for_each(task, tset) {
- ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
- if (ioc) {
- /*
- * Handle cgroup change here.
- */
- rcu_read_lock();
- hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node)
- if (!strncmp(
- icq->q->elevator->type->elevator_name,
- "bfq", ELV_NAME_MAX))
- bfq_bic_change_cgroup(icq_to_bic(icq),
- css);
- rcu_read_unlock();
- put_io_context(ioc);
- }
+ if (samples) {
+ v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum);
+ v = div64_u64(v, samples);
}
+ __blkg_prfill_u64(sf, pd, v);
+ return 0;
}
-static void bfqio_destroy(struct cgroup_subsys_state *css)
+/* print avg_queue_size */
+static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
{
- struct bfqio_cgroup *bgrp = css_to_bfqio(css);
- struct hlist_node *tmp;
- struct bfq_group *bfqg;
-
- /*
- * Since we are destroying the cgroup, there are no more tasks
- * referencing it, and all the RCU grace periods that may have
- * referenced it are ended (as the destruction of the parent
- * cgroup is RCU-safe); bgrp->group_data will not be accessed by
- * anything else and we don't need any synchronization.
- */
- hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node)
- bfq_destroy_group(bgrp, bfqg);
-
- BUG_ON(!hlist_empty(&bgrp->group_data));
-
- kfree(bgrp);
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
+ 0, false);
+ return 0;
}
-static int bfqio_css_online(struct cgroup_subsys_state *css)
+static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
{
- struct bfqio_cgroup *bgrp = css_to_bfqio(css);
+ int ret;
- mutex_lock(&bfqio_mutex);
- bgrp->online = true;
- mutex_unlock(&bfqio_mutex);
+ ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
+ if (ret)
+ return NULL;
- return 0;
+ return blkg_to_bfqg(bfqd->queue->root_blkg);
}
-static void bfqio_css_offline(struct cgroup_subsys_state *css)
-{
- struct bfqio_cgroup *bgrp = css_to_bfqio(css);
+static struct cftype bfqio_files[] = {
+ {
+ .name = "bfq.weight",
+ .read_u64 = bfqio_cgroup_weight_read,
+ .write_u64 = bfqio_cgroup_weight_write,
+ },
+ /* statistics, cover only the tasks in the bfqg */
+ {
+ .name = "bfq.time",
+ .private = offsetof(struct bfq_group, stats.time),
+ .seq_show = bfqg_print_stat,
+ },
+ {
+ .name = "bfq.sectors",
+ .private = offsetof(struct bfq_group, stats.sectors),
+ .seq_show = bfqg_print_stat,
+ },
+ {
+ .name = "bfq.io_service_bytes",
+ .private = offsetof(struct bfq_group, stats.service_bytes),
+ .seq_show = bfqg_print_rwstat,
+ },
+ {
+ .name = "bfq.io_serviced",
+ .private = offsetof(struct bfq_group, stats.serviced),
+ .seq_show = bfqg_print_rwstat,
+ },
+ {
+ .name = "bfq.io_service_time",
+ .private = offsetof(struct bfq_group, stats.service_time),
+ .seq_show = bfqg_print_rwstat,
+ },
+ {
+ .name = "bfq.io_wait_time",
+ .private = offsetof(struct bfq_group, stats.wait_time),
+ .seq_show = bfqg_print_rwstat,
+ },
+ {
+ .name = "bfq.io_merged",
+ .private = offsetof(struct bfq_group, stats.merged),
+ .seq_show = bfqg_print_rwstat,
+ },
+ {
+ .name = "bfq.io_queued",
+ .private = offsetof(struct bfq_group, stats.queued),
+ .seq_show = bfqg_print_rwstat,
+ },
- mutex_lock(&bfqio_mutex);
- bgrp->online = false;
- mutex_unlock(&bfqio_mutex);
-}
+ /* the same statictics which cover the bfqg and its descendants */
+ {
+ .name = "bfq.time_recursive",
+ .private = offsetof(struct bfq_group, stats.time),
+ .seq_show = bfqg_print_stat_recursive,
+ },
+ {
+ .name = "bfq.sectors_recursive",
+ .private = offsetof(struct bfq_group, stats.sectors),
+ .seq_show = bfqg_print_stat_recursive,
+ },
+ {
+ .name = "bfq.io_service_bytes_recursive",
+ .private = offsetof(struct bfq_group, stats.service_bytes),
+ .seq_show = bfqg_print_rwstat_recursive,
+ },
+ {
+ .name = "bfq.io_serviced_recursive",
+ .private = offsetof(struct bfq_group, stats.serviced),
+ .seq_show = bfqg_print_rwstat_recursive,
+ },
+ {
+ .name = "bfq.io_service_time_recursive",
+ .private = offsetof(struct bfq_group, stats.service_time),
+ .seq_show = bfqg_print_rwstat_recursive,
+ },
+ {
+ .name = "bfq.io_wait_time_recursive",
+ .private = offsetof(struct bfq_group, stats.wait_time),
+ .seq_show = bfqg_print_rwstat_recursive,
+ },
+ {
+ .name = "bfq.io_merged_recursive",
+ .private = offsetof(struct bfq_group, stats.merged),
+ .seq_show = bfqg_print_rwstat_recursive,
+ },
+ {
+ .name = "bfq.io_queued_recursive",
+ .private = offsetof(struct bfq_group, stats.queued),
+ .seq_show = bfqg_print_rwstat_recursive,
+ },
+ {
+ .name = "bfq.avg_queue_size",
+ .seq_show = bfqg_print_avg_queue_size,
+ },
+ {
+ .name = "bfq.group_wait_time",
+ .private = offsetof(struct bfq_group, stats.group_wait_time),
+ .seq_show = bfqg_print_stat,
+ },
+ {
+ .name = "bfq.idle_time",
+ .private = offsetof(struct bfq_group, stats.idle_time),
+ .seq_show = bfqg_print_stat,
+ },
+ {
+ .name = "bfq.empty_time",
+ .private = offsetof(struct bfq_group, stats.empty_time),
+ .seq_show = bfqg_print_stat,
+ },
+ {
+ .name = "bfq.dequeue",
+ .private = offsetof(struct bfq_group, stats.dequeue),
+ .seq_show = bfqg_print_stat,
+ },
+ {
+ .name = "bfq.unaccounted_time",
+ .private = offsetof(struct bfq_group, stats.unaccounted_time),
+ .seq_show = bfqg_print_stat,
+ },
+ { } /* terminate */
+};
-struct cgroup_subsys bfqio_cgrp_subsys = {
- .css_alloc = bfqio_create,
- .css_online = bfqio_css_online,
- .css_offline = bfqio_css_offline,
- .can_attach = bfqio_can_attach,
- .attach = bfqio_attach,
- .css_free = bfqio_destroy,
- .legacy_cftypes = bfqio_files,
+static struct blkcg_policy blkcg_policy_bfq = {
+ .pd_size = sizeof(struct bfq_group),
+ .cpd_size = sizeof(struct bfq_group_data),
+ .cftypes = bfqio_files,
+ .pd_init_fn = bfq_pd_init,
+ .cpd_init_fn = bfq_cpd_init,
+ .pd_offline_fn = bfq_pd_offline,
+ .pd_reset_stats_fn = bfq_pd_reset_stats,
};
+
#else
-static inline void bfq_init_entity(struct bfq_entity *entity,
- struct bfq_group *bfqg)
+
+static void bfq_init_entity(struct bfq_entity *entity,
+ struct bfq_group *bfqg)
{
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
entity->weight = entity->new_weight;
entity->orig_weight = entity->new_weight;
- entity->ioprio = entity->new_ioprio;
- entity->ioprio_class = entity->new_ioprio_class;
+ if (bfqq) {
+ bfqq->ioprio = bfqq->new_ioprio;
+ bfqq->ioprio_class = bfqq->new_ioprio_class;
+ }
entity->sched_data = &bfqg->sched_data;
}
-static inline struct bfq_group *
-bfq_bic_update_cgroup(struct bfq_io_cq *bic)
+static struct bfq_group *
+bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
{
struct bfq_data *bfqd = bic_to_bfqd(bic);
return bfqd->root_group;
}
-static inline void bfq_bfqq_move(struct bfq_data *bfqd,
- struct bfq_queue *bfqq,
- struct bfq_entity *entity,
- struct bfq_group *bfqg)
+static void bfq_bfqq_move(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq,
+ struct bfq_entity *entity,
+ struct bfq_group *bfqg)
{
}
@@ -909,23 +1084,24 @@ static void bfq_end_wr_async(struct bfq_data *bfqd)
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}
-static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
+static void bfq_disconnect_groups(struct bfq_data *bfqd)
{
bfq_put_async_queues(bfqd, bfqd->root_group);
}
-static inline void bfq_free_root_group(struct bfq_data *bfqd)
+static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
+ struct blkcg *blkcg)
{
- kfree(bfqd->root_group);
+ return bfqd->root_group;
}
-static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
+static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
{
struct bfq_group *bfqg;
int i;
bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
- if (bfqg == NULL)
+ if (!bfqg)
return NULL;
for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
diff --git a/block/bfq-ioc.c b/block/bfq-ioc.c
index 7f6b0004c..fb7bb8f08 100644
--- a/block/bfq-ioc.c
+++ b/block/bfq-ioc.c
@@ -14,7 +14,7 @@
* icq_to_bic - convert iocontext queue structure to bfq_io_cq.
* @icq: the iocontext queue.
*/
-static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
+static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
{
/* bic->icq is the first member, %NULL will convert to %NULL */
return container_of(icq, struct bfq_io_cq, icq);
@@ -27,8 +27,8 @@ static inline struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
*
* Queue lock must be held.
*/
-static inline struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
- struct io_context *ioc)
+static struct bfq_io_cq *bfq_bic_lookup(struct bfq_data *bfqd,
+ struct io_context *ioc)
{
if (ioc)
return icq_to_bic(ioc_lookup_icq(ioc, bfqd->queue));
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 71b51c1b4..05944cbaa 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -82,6 +82,9 @@ static const int bfq_back_penalty = 2;
/* Idling period duration, in jiffies. */
static int bfq_slice_idle = HZ / 125;
+/* Minimum number of assigned budgets for which stats are safe to compute. */
+static const int bfq_stats_min_budgets = 194;
+
/* Default maximum budget values, in sectors and number of requests. */
static const int bfq_default_max_budget = 16 * 1024;
static const int bfq_max_budget_async_rq = 4;
@@ -163,38 +166,22 @@ static int device_speed_thresh[2];
#define RQ_BIC(rq) ((struct bfq_io_cq *) (rq)->elv.priv[0])
#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
-static inline void bfq_schedule_dispatch(struct bfq_data *bfqd);
+static void bfq_schedule_dispatch(struct bfq_data *bfqd);
#include "bfq-ioc.c"
#include "bfq-sched.c"
#include "bfq-cgroup.c"
-#define bfq_class_idle(bfqq) ((bfqq)->entity.ioprio_class ==\
- IOPRIO_CLASS_IDLE)
-#define bfq_class_rt(bfqq) ((bfqq)->entity.ioprio_class ==\
- IOPRIO_CLASS_RT)
+#define bfq_class_idle(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
+#define bfq_class_rt(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_RT)
#define bfq_sample_valid(samples) ((samples) > 80)
/*
- * The following macro groups conditions that need to be evaluated when
- * checking if existing queues and groups form a symmetric scenario
- * and therefore idling can be reduced or disabled for some of the
- * queues. See the comment to the function bfq_bfqq_must_not_expire()
- * for further details.
- */
-#ifdef CONFIG_CGROUP_BFQIO
-#define symmetric_scenario (!bfqd->active_numerous_groups && \
- !bfq_differentiated_weights(bfqd))
-#else
-#define symmetric_scenario (!bfq_differentiated_weights(bfqd))
-#endif
-
-/*
* We regard a request as SYNC, if either it's a read or has the SYNC bit
* set (in which case it could also be a direct WRITE).
*/
-static inline int bfq_bio_sync(struct bio *bio)
+static int bfq_bio_sync(struct bio *bio)
{
if (bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC))
return 1;
@@ -206,7 +193,7 @@ static inline int bfq_bio_sync(struct bio *bio)
* Scheduler run of queue, if there are requests pending and no one in the
* driver that will restart queueing.
*/
-static inline void bfq_schedule_dispatch(struct bfq_data *bfqd)
+static void bfq_schedule_dispatch(struct bfq_data *bfqd)
{
if (bfqd->queued != 0) {
bfq_log(bfqd, "schedule dispatch");
@@ -230,9 +217,9 @@ static struct request *bfq_choose_req(struct bfq_data *bfqd,
#define BFQ_RQ2_WRAP 0x02 /* request 2 wraps */
unsigned wrap = 0; /* bit mask: requests behind the disk head? */
- if (rq1 == NULL || rq1 == rq2)
+ if (!rq1 || rq1 == rq2)
return rq2;
- if (rq2 == NULL)
+ if (!rq2)
return rq1;
if (rq_is_sync(rq1) && !rq_is_sync(rq2))
@@ -345,17 +332,17 @@ bfq_rq_pos_tree_lookup(struct bfq_data *bfqd, struct rb_root *root,
bfq_log(bfqd, "rq_pos_tree_lookup %llu: returning %d",
(long long unsigned)sector,
- bfqq != NULL ? bfqq->pid : 0);
+ bfqq ? bfqq->pid : 0);
return bfqq;
}
-static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
struct rb_node **p, *parent;
struct bfq_queue *__bfqq;
- if (bfqq->pos_root != NULL) {
+ if (bfqq->pos_root) {
rb_erase(&bfqq->pos_node, bfqq->pos_root);
bfqq->pos_root = NULL;
}
@@ -365,10 +352,10 @@ static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)
if (!bfqq->next_rq)
return;
- bfqq->pos_root = &bfqd->rq_pos_tree;
+ bfqq->pos_root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
__bfqq = bfq_rq_pos_tree_lookup(bfqd, bfqq->pos_root,
blk_rq_pos(bfqq->next_rq), &parent, &p);
- if (__bfqq == NULL) {
+ if (!__bfqq) {
rb_link_node(&bfqq->pos_node, parent, p);
rb_insert_color(&bfqq->pos_node, bfqq->pos_root);
} else
@@ -378,7 +365,7 @@ static void bfq_rq_pos_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq)
/*
* Tell whether there are active queues or groups with differentiated weights.
*/
-static inline bool bfq_differentiated_weights(struct bfq_data *bfqd)
+static bool bfq_differentiated_weights(struct bfq_data *bfqd)
{
/*
* For weights to differ, at least one of the trees must contain
@@ -387,7 +374,7 @@ static inline bool bfq_differentiated_weights(struct bfq_data *bfqd)
return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
(bfqd->queue_weights_tree.rb_node->rb_left ||
bfqd->queue_weights_tree.rb_node->rb_right)
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
) ||
(!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
(bfqd->group_weights_tree.rb_node->rb_left ||
@@ -397,6 +384,40 @@ static inline bool bfq_differentiated_weights(struct bfq_data *bfqd)
}
/*
+ * The following function returns true if every queue must receive the
+ * same share of the throughput (this condition is used when deciding
+ * whether idling may be disabled, see the comments in the function
+ * bfq_bfqq_may_idle()).
+ *
+ * Such a scenario occurs when:
+ * 1) all active queues have the same weight,
+ * 2) all active groups at the same level in the groups tree have the same
+ * weight,
+ * 3) all active groups at the same level in the groups tree have the same
+ * number of children.
+ *
+ * Unfortunately, keeping the necessary state for evaluating exactly the
+ * above symmetry conditions would be quite complex and time-consuming.
+ * Therefore this function evaluates, instead, the following stronger
+ * sub-conditions, for which it is much easier to maintain the needed
+ * state:
+ * 1) all active queues have the same weight,
+ * 2) all active groups have the same weight,
+ * 3) all active groups have at most one active child each.
+ * In particular, the last two conditions are always true if hierarchical
+ * support and the cgroups interface are not enabled, thus no state needs
+ * to be maintained in this case.
+ */
+static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
+{
+ return
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ !bfqd->active_numerous_groups &&
+#endif
+ !bfq_differentiated_weights(bfqd);
+}
+
+/*
* If the weight-counter tree passed as input contains no counter for
* the weight of the input entity, then add that counter; otherwise just
* increment the existing counter.
@@ -495,10 +516,10 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
BUG_ON(RB_EMPTY_NODE(&last->rb_node));
- if (rbprev != NULL)
+ if (rbprev)
prev = rb_entry_rq(rbprev);
- if (rbnext != NULL)
+ if (rbnext)
next = rb_entry_rq(rbnext);
else {
rbnext = rb_first(&bfqq->sort_list);
@@ -510,8 +531,8 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
}
/* see the definition of bfq_async_charge_factor for details */
-static inline unsigned long bfq_serv_to_charge(struct request *rq,
- struct bfq_queue *bfqq)
+static unsigned long bfq_serv_to_charge(struct request *rq,
+ struct bfq_queue *bfqq)
{
return blk_rq_sectors(rq) *
(1 + ((!bfq_bfqq_sync(bfqq)) * (bfqq->wr_coeff == 1) *
@@ -537,7 +558,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
struct request *next_rq = bfqq->next_rq;
unsigned long new_budget;
- if (next_rq == NULL)
+ if (!next_rq)
return;
if (bfqq == bfqd->in_service_queue)
@@ -560,7 +581,7 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
}
}
-static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd)
+static unsigned int bfq_wr_duration(struct bfq_data *bfqd)
{
u64 dur;
@@ -573,13 +594,12 @@ static inline unsigned int bfq_wr_duration(struct bfq_data *bfqd)
return dur;
}
-static inline unsigned
-bfq_bfqq_cooperations(struct bfq_queue *bfqq)
+static unsigned bfq_bfqq_cooperations(struct bfq_queue *bfqq)
{
return bfqq->bic ? bfqq->bic->cooperations : 0;
}
-static inline void
+static void
bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
{
if (bic->saved_idle_window)
@@ -603,7 +623,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
bfqq->wr_coeff = bfqq->bfqd->bfq_wr_coeff;
bfqq->wr_cur_max_time = bic->wr_time_left;
bfqq->last_wr_start_finish = jiffies;
- bfqq->entity.ioprio_changed = 1;
+ bfqq->entity.prio_changed = 1;
}
/*
* Clear wr_time_left to prevent bfq_bfqq_save_state() from
@@ -613,11 +633,12 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
bic->wr_time_left = 0;
}
-/* Must be called with the queue_lock held. */
static int bfqq_process_refs(struct bfq_queue *bfqq)
{
int process_refs, io_refs;
+ lockdep_assert_held(bfqq->bfqd->queue->queue_lock);
+
io_refs = bfqq->allocated[READ] + bfqq->allocated[WRITE];
process_refs = atomic_read(&bfqq->ref) - io_refs - bfqq->entity.on_st;
BUG_ON(process_refs < 0);
@@ -625,8 +646,7 @@ static int bfqq_process_refs(struct bfq_queue *bfqq)
}
/* Empty burst list and add just bfqq (see comments to bfq_handle_burst) */
-static inline void bfq_reset_burst_list(struct bfq_data *bfqd,
- struct bfq_queue *bfqq)
+static void bfq_reset_burst_list(struct bfq_data *bfqd, struct bfq_queue *bfqq)
{
struct bfq_queue *item;
struct hlist_node *n;
@@ -858,14 +878,14 @@ static void bfq_add_request(struct request *rq)
*/
prev = bfqq->next_rq;
next_rq = bfq_choose_req(bfqd, bfqq->next_rq, rq, bfqd->last_position);
- BUG_ON(next_rq == NULL);
+ BUG_ON(!next_rq);
bfqq->next_rq = next_rq;
/*
* Adjust priority tree position, if next_rq changes.
*/
if (prev != bfqq->next_rq)
- bfq_rq_pos_tree_add(bfqd, bfqq);
+ bfq_pos_tree_add_move(bfqd, bfqq);
if (!bfq_bfqq_busy(bfqq)) {
bool soft_rt, coop_or_in_burst,
@@ -873,6 +893,10 @@ static void bfq_add_request(struct request *rq)
bfqq->budget_timeout +
bfqd->bfq_wr_min_idle_time);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_io_add(bfqq_group(RQ_BFQQ(rq)), bfqq,
+ rq->cmd_flags);
+#endif
if (bfq_bfqq_sync(bfqq)) {
bool already_in_burst =
!hlist_unhashed(&bfqq->burst_list_node) ||
@@ -917,7 +941,7 @@ static void bfq_add_request(struct request *rq)
goto add_bfqq_busy;
if (bfq_bfqq_just_split(bfqq))
- goto set_ioprio_changed;
+ goto set_prio_changed;
/*
* If the queue:
@@ -929,7 +953,7 @@ static void bfq_add_request(struct request *rq)
* start a weight-raising period.
*/
if (old_wr_coeff == 1 && (interactive || soft_rt) &&
- (!bfq_bfqq_sync(bfqq) || bfqq->bic != NULL)) {
+ (!bfq_bfqq_sync(bfqq) || bfqq->bic)) {
bfqq->wr_coeff = bfqd->bfq_wr_coeff;
if (interactive)
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
@@ -1008,9 +1032,9 @@ static void bfq_add_request(struct request *rq)
bfqd->bfq_wr_rt_max_time;
}
}
-set_ioprio_changed:
+set_prio_changed:
if (old_wr_coeff != bfqq->wr_coeff)
- entity->ioprio_changed = 1;
+ entity->prio_changed = 1;
add_bfqq_busy:
bfqq->last_idle_bklogged = jiffies;
bfqq->service_from_backlogged = 0;
@@ -1025,7 +1049,7 @@ add_bfqq_busy:
bfqq->wr_cur_max_time = bfq_wr_duration(bfqd);
bfqd->wr_busy_queues++;
- entity->ioprio_changed = 1;
+ entity->prio_changed = 1;
bfq_log_bfqq(bfqd, bfqq,
"non-idle wrais starting at %lu, rais_max_time %u",
jiffies,
@@ -1048,11 +1072,11 @@ static struct request *bfq_find_rq_fmerge(struct bfq_data *bfqd,
struct bfq_queue *bfqq;
bic = bfq_bic_lookup(bfqd, tsk->io_context);
- if (bic == NULL)
+ if (!bic)
return NULL;
bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
- if (bfqq != NULL)
+ if (bfqq)
return elv_rb_find(&bfqq->sort_list, bio_end_sector(bio));
return NULL;
@@ -1068,8 +1092,7 @@ static void bfq_activate_request(struct request_queue *q, struct request *rq)
(long long unsigned)bfqd->last_position);
}
-static inline void bfq_deactivate_request(struct request_queue *q,
- struct request *rq)
+static void bfq_deactivate_request(struct request_queue *q, struct request *rq)
{
struct bfq_data *bfqd = q->elevator->elevator_data;
@@ -1101,7 +1124,7 @@ static void bfq_remove_request(struct request *rq)
/*
* Remove queue from request-position tree as it is empty.
*/
- if (bfqq->pos_root != NULL) {
+ if (bfqq->pos_root) {
rb_erase(&bfqq->pos_node, bfqq->pos_root);
bfqq->pos_root = NULL;
}
@@ -1111,6 +1134,9 @@ static void bfq_remove_request(struct request *rq)
BUG_ON(bfqq->meta_pending == 0);
bfqq->meta_pending--;
}
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_io_remove(bfqq_group(bfqq), rq->cmd_flags);
+#endif
}
static int bfq_merge(struct request_queue *q, struct request **req,
@@ -1120,7 +1146,7 @@ static int bfq_merge(struct request_queue *q, struct request **req,
struct request *__rq;
__rq = bfq_find_rq_fmerge(bfqd, bio);
- if (__rq != NULL && elv_rq_merge_ok(__rq, bio)) {
+ if (__rq && elv_rq_merge_ok(__rq, bio)) {
*req = __rq;
return ELEVATOR_FRONT_MERGE;
}
@@ -1147,7 +1173,7 @@ static void bfq_merged_request(struct request_queue *q, struct request *req,
prev = bfqq->next_rq;
next_rq = bfq_choose_req(bfqd, bfqq->next_rq, req,
bfqd->last_position);
- BUG_ON(next_rq == NULL);
+ BUG_ON(!next_rq);
bfqq->next_rq = next_rq;
/*
* If next_rq changes, update both the queue's budget to
@@ -1156,11 +1182,19 @@ static void bfq_merged_request(struct request_queue *q, struct request *req,
*/
if (prev != bfqq->next_rq) {
bfq_updated_next_req(bfqd, bfqq);
- bfq_rq_pos_tree_add(bfqd, bfqq);
+ bfq_pos_tree_add_move(bfqd, bfqq);
}
}
}
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+static void bfq_bio_merged(struct request_queue *q, struct request *req,
+ struct bio *bio)
+{
+ bfqg_stats_update_io_merged(bfqq_group(RQ_BFQQ(req)), bio->bi_rw);
+}
+#endif
+
static void bfq_merged_requests(struct request_queue *q, struct request *rq,
struct request *next)
{
@@ -1187,18 +1221,21 @@ static void bfq_merged_requests(struct request_queue *q, struct request *rq,
bfqq->next_rq = rq;
bfq_remove_request(next);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
+#endif
}
/* Must be called with bfqq != NULL */
-static inline void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
+static void bfq_bfqq_end_wr(struct bfq_queue *bfqq)
{
- BUG_ON(bfqq == NULL);
+ BUG_ON(!bfqq);
if (bfq_bfqq_busy(bfqq))
bfqq->bfqd->wr_busy_queues--;
bfqq->wr_coeff = 1;
bfqq->wr_cur_max_time = 0;
/* Trigger a weight change on the next activation of the queue */
- bfqq->entity.ioprio_changed = 1;
+ bfqq->entity.prio_changed = 1;
}
static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
@@ -1208,9 +1245,9 @@ static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
for (i = 0; i < 2; i++)
for (j = 0; j < IOPRIO_BE_NR; j++)
- if (bfqg->async_bfqq[i][j] != NULL)
+ if (bfqg->async_bfqq[i][j])
bfq_bfqq_end_wr(bfqg->async_bfqq[i][j]);
- if (bfqg->async_idle_bfqq != NULL)
+ if (bfqg->async_idle_bfqq)
bfq_bfqq_end_wr(bfqg->async_idle_bfqq);
}
@@ -1229,7 +1266,7 @@ static void bfq_end_wr(struct bfq_data *bfqd)
spin_unlock_irq(bfqd->queue->queue_lock);
}
-static inline sector_t bfq_io_struct_pos(void *io_struct, bool request)
+static sector_t bfq_io_struct_pos(void *io_struct, bool request)
{
if (request)
return blk_rq_pos(io_struct);
@@ -1237,25 +1274,18 @@ static inline sector_t bfq_io_struct_pos(void *io_struct, bool request)
return ((struct bio *)io_struct)->bi_iter.bi_sector;
}
-static inline sector_t bfq_dist_from(sector_t pos1,
- sector_t pos2)
-{
- if (pos1 >= pos2)
- return pos1 - pos2;
- else
- return pos2 - pos1;
-}
-
-static inline int bfq_rq_close_to_sector(void *io_struct, bool request,
- sector_t sector)
+static int bfq_rq_close_to_sector(void *io_struct, bool request,
+ sector_t sector)
{
- return bfq_dist_from(bfq_io_struct_pos(io_struct, request), sector) <=
+ return abs64(bfq_io_struct_pos(io_struct, request) - sector) <=
BFQQ_SEEK_THR;
}
-static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
+static struct bfq_queue *bfqq_find_close(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq,
+ sector_t sector)
{
- struct rb_root *root = &bfqd->rq_pos_tree;
+ struct rb_root *root = &bfq_bfqq_to_bfqg(bfqq)->rq_pos_tree;
struct rb_node *parent, *node;
struct bfq_queue *__bfqq;
@@ -1267,7 +1297,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
* request, choose it.
*/
__bfqq = bfq_rq_pos_tree_lookup(bfqd, root, sector, &parent, NULL);
- if (__bfqq != NULL)
+ if (__bfqq)
return __bfqq;
/*
@@ -1283,7 +1313,7 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
node = rb_next(&__bfqq->pos_node);
else
node = rb_prev(&__bfqq->pos_node);
- if (node == NULL)
+ if (!node)
return NULL;
__bfqq = rb_entry(node, struct bfq_queue, pos_node);
@@ -1293,56 +1323,19 @@ static struct bfq_queue *bfqq_close(struct bfq_data *bfqd, sector_t sector)
return NULL;
}
-/*
- * bfqd - obvious
- * cur_bfqq - passed in so that we don't decide that the current queue
- * is closely cooperating with itself
- * sector - used as a reference point to search for a close queue
- */
-static struct bfq_queue *bfq_close_cooperator(struct bfq_data *bfqd,
- struct bfq_queue *cur_bfqq,
- sector_t sector)
+static struct bfq_queue *bfq_find_close_cooperator(struct bfq_data *bfqd,
+ struct bfq_queue *cur_bfqq,
+ sector_t sector)
{
struct bfq_queue *bfqq;
- if (bfq_class_idle(cur_bfqq))
- return NULL;
- if (!bfq_bfqq_sync(cur_bfqq))
- return NULL;
- if (BFQQ_SEEKY(cur_bfqq))
- return NULL;
-
- /* If device has only one backlogged bfq_queue, don't search. */
- if (bfqd->busy_queues == 1)
- return NULL;
-
/*
* We should notice if some of the queues are cooperating, e.g.
* working closely on the same area of the disk. In that case,
* we can group them together and don't waste time idling.
*/
- bfqq = bfqq_close(bfqd, sector);
- if (bfqq == NULL || bfqq == cur_bfqq)
- return NULL;
-
- /*
- * Do not merge queues from different bfq_groups.
- */
- if (bfqq->entity.parent != cur_bfqq->entity.parent)
- return NULL;
-
- /*
- * It only makes sense to merge sync queues.
- */
- if (!bfq_bfqq_sync(bfqq))
- return NULL;
- if (BFQQ_SEEKY(bfqq))
- return NULL;
-
- /*
- * Do not merge queues of different priority classes.
- */
- if (bfq_class_rt(bfqq) != bfq_class_rt(cur_bfqq))
+ bfqq = bfqq_find_close(bfqd, cur_bfqq, sector);
+ if (!bfqq || bfqq == cur_bfqq)
return NULL;
return bfqq;
@@ -1409,6 +1402,35 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
return new_bfqq;
}
+static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
+ struct bfq_queue *new_bfqq)
+{
+ if (WARN_ON(bfqq->entity.parent != new_bfqq->entity.parent))
+ return false;
+
+ if (bfq_class_idle(bfqq) || bfq_class_idle(new_bfqq) ||
+ (bfqq->ioprio_class != new_bfqq->ioprio_class))
+ return false;
+
+ /*
+ * If either of the queues has already been detected as seeky,
+ * then merging it with the other queue is unlikely to lead to
+ * sequential I/O.
+ */
+ if (BFQQ_SEEKY(bfqq) || BFQQ_SEEKY(new_bfqq))
+ return false;
+
+ /*
+ * Interleaved I/O is known to be done by (some) applications
+ * only for reads, so it does not make sense to merge async
+ * queues.
+ */
+ if (!bfq_bfqq_sync(bfqq) || !bfq_bfqq_sync(new_bfqq))
+ return false;
+
+ return true;
+}
+
/*
* Attempt to schedule a merge of bfqq with the currently in-service queue
* or with a close queue among the scheduled queues.
@@ -1430,56 +1452,48 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (bfqq->new_bfqq)
return bfqq->new_bfqq;
-
if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq))
return NULL;
+ /* If device has only one backlogged bfq_queue, don't search. */
+ if (bfqd->busy_queues == 1)
+ return NULL;
in_service_bfqq = bfqd->in_service_queue;
- if (in_service_bfqq == NULL || in_service_bfqq == bfqq ||
+ if (!in_service_bfqq || in_service_bfqq == bfqq ||
!bfqd->in_service_bic ||
unlikely(in_service_bfqq == &bfqd->oom_bfqq))
goto check_scheduled;
- if (bfq_class_idle(in_service_bfqq) || bfq_class_idle(bfqq))
- goto check_scheduled;
-
- if (bfq_class_rt(in_service_bfqq) != bfq_class_rt(bfqq))
- goto check_scheduled;
-
- if (in_service_bfqq->entity.parent != bfqq->entity.parent)
- goto check_scheduled;
-
if (bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
- bfq_bfqq_sync(in_service_bfqq) && bfq_bfqq_sync(bfqq)) {
+ bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) {
new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
- if (new_bfqq != NULL)
- return new_bfqq; /* Merge with in-service queue */
+ if (new_bfqq)
+ return new_bfqq;
}
-
/*
* Check whether there is a cooperator among currently scheduled
* queues. The only thing we need is that the bio/request is not
* NULL, as we need it to establish whether a cooperator exists.
*/
check_scheduled:
- new_bfqq = bfq_close_cooperator(bfqd, bfqq,
- bfq_io_struct_pos(io_struct, request));
- if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq))
+ new_bfqq = bfq_find_close_cooperator(bfqd, bfqq,
+ bfq_io_struct_pos(io_struct, request));
+ if (new_bfqq && likely(new_bfqq != &bfqd->oom_bfqq) &&
+ bfq_may_be_close_cooperator(bfqq, new_bfqq))
return bfq_setup_merge(bfqq, new_bfqq);
return NULL;
}
-static inline void
-bfq_bfqq_save_state(struct bfq_queue *bfqq)
+static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
{
/*
- * If bfqq->bic == NULL, the queue is already shared or its requests
+ * If !bfqq->bic, the queue is already shared or its requests
* have already been redirected to a shared queue; both idle window
* and weight raising state have already been saved. Do nothing.
*/
- if (bfqq->bic == NULL)
+ if (!bfqq->bic)
return;
if (bfqq->bic->wr_time_left)
/*
@@ -1523,8 +1537,7 @@ bfq_bfqq_save_state(struct bfq_queue *bfqq)
bfqq->bic->failed_cooperations = 0;
}
-static inline void
-bfq_get_bic_reference(struct bfq_queue *bfqq)
+static void bfq_get_bic_reference(struct bfq_queue *bfqq)
{
/*
* If bfqq->bic has a non-NULL value, the bic to which it belongs
@@ -1572,7 +1585,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
bfq_put_queue(bfqq);
}
-static inline void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
+static void bfq_bfqq_increase_failed_cooperations(struct bfq_queue *bfqq)
{
struct bfq_io_cq *bic = bfqq->bic;
struct bfq_data *bfqd = bfqq->bfqd;
@@ -1603,7 +1616,7 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
* Queue lock is held here.
*/
bic = bfq_bic_lookup(bfqd, current->io_context);
- if (bic == NULL)
+ if (!bic)
return 0;
bfqq = bic_to_bfqq(bic, bfq_bio_sync(bio));
@@ -1611,9 +1624,9 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
* We take advantage of this function to perform an early merge
* of the queues of possible cooperating processes.
*/
- if (bfqq != NULL) {
+ if (bfqq) {
new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false);
- if (new_bfqq != NULL) {
+ if (new_bfqq) {
bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq);
/*
* If we get here, the bio will be queued in the
@@ -1631,7 +1644,10 @@ static int bfq_allow_merge(struct request_queue *q, struct request *rq,
static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
- if (bfqq != NULL) {
+ if (bfqq) {
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_avg_queue_size(bfqq_group(bfqq));
+#endif
bfq_mark_bfqq_must_alloc(bfqq);
bfq_mark_bfqq_budget_new(bfqq);
bfq_clear_bfqq_fifo_expire(bfqq);
@@ -1639,7 +1655,7 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd,
bfqd->budgets_assigned = (bfqd->budgets_assigned*7 + 256) / 8;
bfq_log_bfqq(bfqd, bfqq,
- "set_in_service_queue, cur-budget = %lu",
+ "set_in_service_queue, cur-budget = %d",
bfqq->entity.budget);
}
@@ -1662,9 +1678,9 @@ static struct bfq_queue *bfq_set_in_service_queue(struct bfq_data *bfqd)
* stored in bfqd, which is dynamically updated according to the
* estimated disk peak rate; otherwise return the default max budget
*/
-static inline unsigned long bfq_max_budget(struct bfq_data *bfqd)
+static int bfq_max_budget(struct bfq_data *bfqd)
{
- if (bfqd->budgets_assigned < 194)
+ if (bfqd->budgets_assigned < bfq_stats_min_budgets)
return bfq_default_max_budget;
else
return bfqd->bfq_max_budget;
@@ -1674,9 +1690,9 @@ static inline unsigned long bfq_max_budget(struct bfq_data *bfqd)
* Return min budget, which is a fraction of the current or default
* max budget (trying with 1/32)
*/
-static inline unsigned long bfq_min_budget(struct bfq_data *bfqd)
+static int bfq_min_budget(struct bfq_data *bfqd)
{
- if (bfqd->budgets_assigned < 194)
+ if (bfqd->budgets_assigned < bfq_stats_min_budgets)
return bfq_default_max_budget / 32;
else
return bfqd->bfq_max_budget / 32;
@@ -1692,7 +1708,7 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
/* Processes have exited, don't wait. */
bic = bfqd->in_service_bic;
- if (bic == NULL || atomic_read(&bic->icq.ioc->active_ref) == 0)
+ if (!bic || atomic_read(&bic->icq.ioc->active_ref) == 0)
return;
bfq_mark_bfqq_wait_request(bfqq);
@@ -1718,12 +1734,15 @@ static void bfq_arm_slice_timer(struct bfq_data *bfqd)
((BFQQ_SEEKY(bfqq) && bfqq->entity.service >
bfq_max_budget(bfqq->bfqd) / 8) ||
bfq_bfqq_constantly_seeky(bfqq)) && bfqq->wr_coeff == 1 &&
- symmetric_scenario)
+ bfq_symmetric_scenario(bfqd))
sl = min(sl, msecs_to_jiffies(BFQ_MIN_TT));
else if (bfqq->wr_coeff > 1)
sl = sl * 3;
bfqd->last_idling_start = ktime_get();
mod_timer(&bfqd->idle_slice_timer, jiffies + sl);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_set_start_idle_time(bfqq_group(bfqq));
+#endif
bfq_log(bfqd, "arm idle: %u/%u ms",
jiffies_to_msecs(sl), jiffies_to_msecs(bfqd->bfq_slice_idle));
}
@@ -1777,6 +1796,10 @@ static void bfq_dispatch_insert(struct request_queue *q, struct request *rq)
if (bfq_bfqq_sync(bfqq))
bfqd->sync_flight++;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_dispatch(bfqq_group(bfqq), blk_rq_bytes(rq),
+ rq->cmd_flags);
+#endif
}
/*
@@ -1802,7 +1825,7 @@ static struct request *bfq_check_fifo(struct bfq_queue *bfqq)
return rq;
}
-static inline unsigned long bfq_bfqq_budget_left(struct bfq_queue *bfqq)
+static int bfq_bfqq_budget_left(struct bfq_queue *bfqq)
{
struct bfq_entity *entity = &bfqq->entity;
return entity->budget - entity->service;
@@ -1836,7 +1859,7 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
/*
* Resort priority tree of potential close cooperators.
*/
- bfq_rq_pos_tree_add(bfqd, bfqq);
+ bfq_pos_tree_add_move(bfqd, bfqq);
}
}
@@ -1846,24 +1869,24 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
* @bfqq: queue to update.
* @reason: reason for expiration.
*
- * Handle the feedback on @bfqq budget. See the body for detailed
- * comments.
+ * Handle the feedback on @bfqq budget at queue expiration.
+ * See the body for detailed comments.
*/
static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
enum bfqq_expiration reason)
{
struct request *next_rq;
- unsigned long budget, min_budget;
+ int budget, min_budget;
budget = bfqq->max_budget;
min_budget = bfq_min_budget(bfqd);
BUG_ON(bfqq != bfqd->in_service_queue);
- bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %lu, budg left %lu",
+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last budg %d, budg left %d",
bfqq->entity.budget, bfq_bfqq_budget_left(bfqq));
- bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %lu, min budg %lu",
+ bfq_log_bfqq(bfqd, bfqq, "recalc_budg: last max_budg %d, min budg %d",
budget, bfq_min_budget(bfqd));
bfq_log_bfqq(bfqd, bfqq, "recalc_budg: sync %d, seeky %d",
bfq_bfqq_sync(bfqq), BFQQ_SEEKY(bfqd->in_service_queue));
@@ -1940,18 +1963,19 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
default:
return;
}
- } else /* async queue */
- /* async queues get always the maximum possible budget
- * (their ability to dispatch is limited by
- * @bfqd->bfq_max_budget_async_rq).
- */
+ } else
+ /*
+ * Async queues get always the maximum possible budget
+ * (their ability to dispatch is limited by
+ * @bfqd->bfq_max_budget_async_rq).
+ */
budget = bfqd->bfq_max_budget;
bfqq->max_budget = budget;
- if (bfqd->budgets_assigned >= 194 && bfqd->bfq_user_max_budget == 0 &&
- bfqq->max_budget > bfqd->bfq_max_budget)
- bfqq->max_budget = bfqd->bfq_max_budget;
+ if (bfqd->budgets_assigned >= bfq_stats_min_budgets &&
+ !bfqd->bfq_user_max_budget)
+ bfqq->max_budget = min(bfqq->max_budget, bfqd->bfq_max_budget);
/*
* Make sure that we have enough budget for the next request.
@@ -1960,14 +1984,14 @@ static void __bfq_bfqq_recalc_budget(struct bfq_data *bfqd,
* update.
*/
next_rq = bfqq->next_rq;
- if (next_rq != NULL)
+ if (next_rq)
bfqq->entity.budget = max_t(unsigned long, bfqq->max_budget,
bfq_serv_to_charge(next_rq, bfqq));
else
bfqq->entity.budget = bfqq->max_budget;
- bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %lu",
- next_rq != NULL ? blk_rq_sectors(next_rq) : 0,
+ bfq_log_bfqq(bfqd, bfqq, "head sect: %u, new budget %d",
+ next_rq ? blk_rq_sectors(next_rq) : 0,
bfqq->entity.budget);
}
@@ -1993,15 +2017,15 @@ static unsigned long bfq_calc_max_budget(u64 peak_rate, u64 timeout)
* seeky processes, and hence reduce their chances to lower the
* throughput. See the code for more details.
*/
-static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
- int compensate, enum bfqq_expiration reason)
+static bool bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
+ bool compensate, enum bfqq_expiration reason)
{
u64 bw, usecs, expected, timeout;
ktime_t delta;
int update = 0;
if (!bfq_bfqq_sync(bfqq) || bfq_bfqq_budget_new(bfqq))
- return 0;
+ return false;
if (compensate)
delta = bfqd->last_idling_start;
@@ -2012,7 +2036,7 @@ static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
/* Don't trust short/unrealistic values. */
if (usecs < 100 || usecs >= LONG_MAX)
- return 0;
+ return false;
/*
* Calculate the bandwidth for the last slice. We use a 64 bit
@@ -2061,7 +2085,7 @@ static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqd->bfq_max_budget =
bfq_calc_max_budget(bfqd->peak_rate,
timeout);
- bfq_log(bfqd, "new max_budget=%lu",
+ bfq_log(bfqd, "new max_budget=%d",
bfqd->bfq_max_budget);
}
if (bfqd->device_speed == BFQ_BFQD_FAST &&
@@ -2086,7 +2110,7 @@ static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* and for the moment return false.
*/
if (bfqq->entity.budget <= bfq_max_budget(bfqd) / 8)
- return 0;
+ return false;
/*
* A process is considered ``slow'' (i.e., seeky, so that we
@@ -2161,8 +2185,8 @@ static int bfq_update_peak_rate(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* seems to be quite precise also in embedded systems and KVM/QEMU virtual
* machines.
*/
-static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
- struct bfq_queue *bfqq)
+static unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq)
{
return max(bfqq->last_idle_bklogged +
HZ * bfqq->service_from_backlogged /
@@ -2175,7 +2199,7 @@ static inline unsigned long bfq_bfqq_softrt_next_start(struct bfq_data *bfqd,
* the current time will be lower than this time instant according to the macro
* time_is_before_jiffies().
*/
-static inline unsigned long bfq_infinity_from_now(unsigned long now)
+static unsigned long bfq_infinity_from_now(unsigned long now)
{
return now + ULONG_MAX / 2;
}
@@ -2212,13 +2236,14 @@ static inline unsigned long bfq_infinity_from_now(unsigned long now)
*/
static void bfq_bfqq_expire(struct bfq_data *bfqd,
struct bfq_queue *bfqq,
- int compensate,
+ bool compensate,
enum bfqq_expiration reason)
{
- int slow;
+ bool slow;
BUG_ON(bfqq != bfqd->in_service_queue);
- /* Update disk peak rate for autotuning and check whether the
+ /*
+ * Update disk peak rate for autotuning and check whether the
* process is slow (see bfq_update_peak_rate).
*/
slow = bfq_update_peak_rate(bfqd, bfqq, compensate, reason);
@@ -2312,12 +2337,12 @@ static void bfq_bfqq_expire(struct bfq_data *bfqd,
* just checked on request arrivals and completions, as well as on
* idle timer expirations.
*/
-static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
+static bool bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
{
if (bfq_bfqq_budget_new(bfqq) ||
time_before(jiffies, bfqq->budget_timeout))
- return 0;
- return 1;
+ return false;
+ return true;
}
/*
@@ -2328,7 +2353,7 @@ static int bfq_bfqq_budget_timeout(struct bfq_queue *bfqq)
* does not hold, or if the queue is slow enough to deserve only to be
* kicked off for preserving a high throughput.
*/
-static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
+static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
{
bfq_log_bfqq(bfqq->bfqd, bfqq,
"may_budget_timeout: wait_request %d left %d timeout %d",
@@ -2343,183 +2368,278 @@ static inline int bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
}
/*
- * Device idling is allowed only for the queues for which this function
- * returns true. For this reason, the return value of this function plays a
- * critical role for both throughput boosting and service guarantees. The
- * return value is computed through a logical expression. In this rather
- * long comment, we try to briefly describe all the details and motivations
- * behind the components of this logical expression.
- *
- * First, the expression is false if bfqq is not sync, or if: bfqq happened
- * to become active during a large burst of queue activations, and the
- * pattern of requests bfqq contains boosts the throughput if bfqq is
- * expired. In fact, queues that became active during a large burst benefit
- * only from throughput, as discussed in the comments to bfq_handle_burst.
- * In this respect, expiring bfqq certainly boosts the throughput on NCQ-
- * capable flash-based devices, whereas, on rotational devices, it boosts
- * the throughput only if bfqq contains random requests.
- *
- * On the opposite end, if (a) bfqq is sync, (b) the above burst-related
- * condition does not hold, and (c) bfqq is being weight-raised, then the
- * expression always evaluates to true, as device idling is instrumental
- * for preserving low-latency guarantees (see [1]). If, instead, conditions
- * (a) and (b) do hold, but (c) does not, then the expression evaluates to
- * true only if: (1) bfqq is I/O-bound and has a non-null idle window, and
- * (2) at least one of the following two conditions holds.
- * The first condition is that the device is not performing NCQ, because
- * idling the device most certainly boosts the throughput if this condition
- * holds and bfqq is I/O-bound and has been granted a non-null idle window.
- * The second compound condition is made of the logical AND of two components.
- *
- * The first component is true only if there is no weight-raised busy
- * queue. This guarantees that the device is not idled for a sync non-
- * weight-raised queue when there are busy weight-raised queues. The former
- * is then expired immediately if empty. Combined with the timestamping
- * rules of BFQ (see [1] for details), this causes sync non-weight-raised
- * queues to get a lower number of requests served, and hence to ask for a
- * lower number of requests from the request pool, before the busy weight-
- * raised queues get served again.
- *
- * This is beneficial for the processes associated with weight-raised
- * queues, when the request pool is saturated (e.g., in the presence of
- * write hogs). In fact, if the processes associated with the other queues
- * ask for requests at a lower rate, then weight-raised processes have a
- * higher probability to get a request from the pool immediately (or at
- * least soon) when they need one. Hence they have a higher probability to
- * actually get a fraction of the disk throughput proportional to their
- * high weight. This is especially true with NCQ-capable drives, which
- * enqueue several requests in advance and further reorder internally-
- * queued requests.
- *
- * In the end, mistreating non-weight-raised queues when there are busy
- * weight-raised queues seems to mitigate starvation problems in the
- * presence of heavy write workloads and NCQ, and hence to guarantee a
- * higher application and system responsiveness in these hostile scenarios.
- *
- * If the first component of the compound condition is instead true, i.e.,
- * there is no weight-raised busy queue, then the second component of the
- * compound condition takes into account service-guarantee and throughput
- * issues related to NCQ (recall that the compound condition is evaluated
- * only if the device is detected as supporting NCQ).
+ * For a queue that becomes empty, device idling is allowed only if
+ * this function returns true for that queue. As a consequence, since
+ * device idling plays a critical role for both throughput boosting
+ * and service guarantees, the return value of this function plays a
+ * critical role as well.
*
- * As for service guarantees, allowing the drive to enqueue more than one
- * request at a time, and hence delegating de facto final scheduling
- * decisions to the drive's internal scheduler, causes loss of control on
- * the actual request service order. In this respect, when the drive is
- * allowed to enqueue more than one request at a time, the service
- * distribution enforced by the drive's internal scheduler is likely to
- * coincide with the desired device-throughput distribution only in the
- * following, perfectly symmetric, scenario:
- * 1) all active queues have the same weight,
- * 2) all active groups at the same level in the groups tree have the same
- * weight,
- * 3) all active groups at the same level in the groups tree have the same
- * number of children.
- *
- * Even in such a scenario, sequential I/O may still receive a preferential
- * treatment, but this is not likely to be a big issue with flash-based
- * devices, because of their non-dramatic loss of throughput with random
- * I/O. Things do differ with HDDs, for which additional care is taken, as
- * explained after completing the discussion for flash-based devices.
+ * In a nutshell, this function returns true only if idling is
+ * beneficial for throughput or, even if detrimental for throughput,
+ * idling is however necessary to preserve service guarantees (low
+ * latency, desired throughput distribution, ...). In particular, on
+ * NCQ-capable devices, this function tries to return false, so as to
+ * help keep the drives' internal queues full, whenever this helps the
+ * device boost the throughput without causing any service-guarantee
+ * issue.
*
- * Unfortunately, keeping the necessary state for evaluating exactly the
- * above symmetry conditions would be quite complex and time-consuming.
- * Therefore BFQ evaluates instead the following stronger sub-conditions,
- * for which it is much easier to maintain the needed state:
- * 1) all active queues have the same weight,
- * 2) all active groups have the same weight,
- * 3) all active groups have at most one active child each.
- * In particular, the last two conditions are always true if hierarchical
- * support and the cgroups interface are not enabled, hence no state needs
- * to be maintained in this case.
- *
- * According to the above considerations, the second component of the
- * compound condition evaluates to true if any of the above symmetry
- * sub-condition does not hold, or the device is not flash-based. Therefore,
- * if also the first component is true, then idling is allowed for a sync
- * queue. These are the only sub-conditions considered if the device is
- * flash-based, as, for such a device, it is sensible to force idling only
- * for service-guarantee issues. In fact, as for throughput, idling
- * NCQ-capable flash-based devices would not boost the throughput even
- * with sequential I/O; rather it would lower the throughput in proportion
- * to how fast the device is. In the end, (only) if all the three
- * sub-conditions hold and the device is flash-based, the compound
- * condition evaluates to false and therefore no idling is performed.
- *
- * As already said, things change with a rotational device, where idling
- * boosts the throughput with sequential I/O (even with NCQ). Hence, for
- * such a device the second component of the compound condition evaluates
- * to true also if the following additional sub-condition does not hold:
- * the queue is constantly seeky. Unfortunately, this different behavior
- * with respect to flash-based devices causes an additional asymmetry: if
- * some sync queues enjoy idling and some other sync queues do not, then
- * the latter get a low share of the device throughput, simply because the
- * former get many requests served after being set as in service, whereas
- * the latter do not. As a consequence, to guarantee the desired throughput
- * distribution, on HDDs the compound expression evaluates to true (and
- * hence device idling is performed) also if the following last symmetry
- * condition does not hold: no other queue is benefiting from idling. Also
- * this last condition is actually replaced with a simpler-to-maintain and
- * stronger condition: there is no busy queue which is not constantly seeky
- * (and hence may also benefit from idling).
- *
- * To sum up, when all the required symmetry and throughput-boosting
- * sub-conditions hold, the second component of the compound condition
- * evaluates to false, and hence no idling is performed. This helps to
- * keep the drives' internal queues full on NCQ-capable devices, and hence
- * to boost the throughput, without causing 'almost' any loss of service
- * guarantees. The 'almost' follows from the fact that, if the internal
- * queue of one such device is filled while all the sub-conditions hold,
- * but at some point in time some sub-condition stops to hold, then it may
- * become impossible to let requests be served in the new desired order
- * until all the requests already queued in the device have been served.
+ * In more detail, the return value of this function is obtained by,
+ * first, computing a number of boolean variables that take into
+ * account throughput and service-guarantee issues, and, then,
+ * combining these variables in a logical expression. Most of the
+ * issues taken into account are not trivial. We discuss these issues
+ * while introducing the variables.
*/
-static inline bool bfq_bfqq_must_not_expire(struct bfq_queue *bfqq)
+static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
-#define cond_for_seeky_on_ncq_hdd (bfq_bfqq_constantly_seeky(bfqq) && \
- bfqd->busy_in_flight_queues == \
- bfqd->const_seeky_busy_in_flight_queues)
+ bool idling_boosts_thr, idling_boosts_thr_without_issues,
+ all_queues_seeky, on_hdd_and_not_all_queues_seeky,
+ idling_needed_for_service_guarantees,
+ asymmetric_scenario;
-#define cond_for_expiring_in_burst (bfq_bfqq_in_large_burst(bfqq) && \
- bfqd->hw_tag && \
- (blk_queue_nonrot(bfqd->queue) || \
- bfq_bfqq_constantly_seeky(bfqq)))
+ /*
+ * The next variable takes into account the cases where idling
+ * boosts the throughput.
+ *
+ * The value of the variable is computed considering, first, that
+ * idling is virtually always beneficial for the throughput if:
+ * (a) the device is not NCQ-capable, or
+ * (b) regardless of the presence of NCQ, the device is rotational
+ * and the request pattern for bfqq is I/O-bound and sequential.
+ *
+ * Secondly, and in contrast to the above item (b), idling an
+ * NCQ-capable flash-based device would not boost the
+ * throughput even with sequential I/O; rather it would lower
+ * the throughput in proportion to how fast the device
+ * is. Accordingly, the next variable is true if any of the
+ * above conditions (a) and (b) is true, and, in particular,
+ * happens to be false if bfqd is an NCQ-capable flash-based
+ * device.
+ */
+ idling_boosts_thr = !bfqd->hw_tag ||
+ (!blk_queue_nonrot(bfqd->queue) && bfq_bfqq_IO_bound(bfqq) &&
+ bfq_bfqq_idle_window(bfqq)) ;
-/*
- * Condition for expiring a non-weight-raised queue (and hence not idling
- * the device).
- */
-#define cond_for_expiring_non_wr (bfqd->hw_tag && \
- (bfqd->wr_busy_queues > 0 || \
- (blk_queue_nonrot(bfqd->queue) || \
- cond_for_seeky_on_ncq_hdd)))
+ /*
+ * The value of the next variable,
+ * idling_boosts_thr_without_issues, is equal to that of
+ * idling_boosts_thr, unless a special case holds. In this
+ * special case, described below, idling may cause problems to
+ * weight-raised queues.
+ *
+ * When the request pool is saturated (e.g., in the presence
+ * of write hogs), if the processes associated with
+ * non-weight-raised queues ask for requests at a lower rate,
+ * then processes associated with weight-raised queues have a
+ * higher probability to get a request from the pool
+ * immediately (or at least soon) when they need one. Thus
+ * they have a higher probability to actually get a fraction
+ * of the device throughput proportional to their high
+ * weight. This is especially true with NCQ-capable drives,
+ * which enqueue several requests in advance, and further
+ * reorder internally-queued requests.
+ *
+ * For this reason, we force to false the value of
+ * idling_boosts_thr_without_issues if there are weight-raised
+ * busy queues. In this case, and if bfqq is not weight-raised,
+ * this guarantees that the device is not idled for bfqq (if,
+ * instead, bfqq is weight-raised, then idling will be
+ * guaranteed by another variable, see below). Combined with
+ * the timestamping rules of BFQ (see [1] for details), this
+ * behavior causes bfqq, and hence any sync non-weight-raised
+ * queue, to get a lower number of requests served, and thus
+ * to ask for a lower number of requests from the request
+ * pool, before the busy weight-raised queues get served
+ * again. This often mitigates starvation problems in the
+ * presence of heavy write workloads and NCQ, thereby
+ * guaranteeing a higher application and system responsiveness
+ * in these hostile scenarios.
+ */
+ idling_boosts_thr_without_issues = idling_boosts_thr &&
+ bfqd->wr_busy_queues == 0;
+
+ /*
+ * There are then two cases where idling must be performed not
+ * for throughput concerns, but to preserve service
+ * guarantees. In the description of these cases, we say, for
+ * short, that a queue is sequential/random if the process
+ * associated to the queue issues sequential/random requests
+ * (in the second case the queue may be tagged as seeky or
+ * even constantly_seeky).
+ *
+ * To introduce the first case, we note that, since
+ * bfq_bfqq_idle_window(bfqq) is false if the device is
+ * NCQ-capable and bfqq is random (see
+ * bfq_update_idle_window()), then, from the above two
+ * assignments it follows that
+ * idling_boosts_thr_without_issues is false if the device is
+ * NCQ-capable and bfqq is random. Therefore, for this case,
+ * device idling would never be allowed if we used just
+ * idling_boosts_thr_without_issues to decide whether to allow
+ * it. And, beneficially, this would imply that throughput
+ * would always be boosted also with random I/O on NCQ-capable
+ * HDDs.
+ *
+ * But we must be careful on this point, to avoid an unfair
+ * treatment for bfqq. In fact, because of the same above
+ * assignments, idling_boosts_thr_without_issues is, on the
+ * other hand, true if 1) the device is an HDD and bfqq is
+ * sequential, and 2) there are no busy weight-raised
+ * queues. As a consequence, if we used just
+ * idling_boosts_thr_without_issues to decide whether to idle
+ * the device, then with an HDD we might easily bump into a
+ * scenario where queues that are sequential and I/O-bound
+ * would enjoy idling, whereas random queues would not. The
+ * latter might then get a low share of the device throughput,
+ * simply because the former would get many requests served
+ * after being set as in service, while the latter would not.
+ *
+ * To address this issue, we start by setting to true a
+ * sentinel variable, on_hdd_and_not_all_queues_seeky, if the
+ * device is rotational and not all queues with pending or
+ * in-flight requests are constantly seeky (i.e., there are
+ * active sequential queues, and bfqq might then be mistreated
+ * if it does not enjoy idling because it is random).
+ */
+ all_queues_seeky = bfq_bfqq_constantly_seeky(bfqq) &&
+ bfqd->busy_in_flight_queues ==
+ bfqd->const_seeky_busy_in_flight_queues;
+
+ on_hdd_and_not_all_queues_seeky =
+ !blk_queue_nonrot(bfqd->queue) && !all_queues_seeky;
+
+ /*
+ * To introduce the second case where idling needs to be
+ * performed to preserve service guarantees, we can note that
+ * allowing the drive to enqueue more than one request at a
+ * time, and hence delegating de facto final scheduling
+ * decisions to the drive's internal scheduler, causes loss of
+ * control on the actual request service order. In particular,
+ * the critical situation is when requests from different
+ * processes happens to be present, at the same time, in the
+ * internal queue(s) of the drive. In such a situation, the
+ * drive, by deciding the service order of the
+ * internally-queued requests, does determine also the actual
+ * throughput distribution among these processes. But the
+ * drive typically has no notion or concern about per-process
+ * throughput distribution, and makes its decisions only on a
+ * per-request basis. Therefore, the service distribution
+ * enforced by the drive's internal scheduler is likely to
+ * coincide with the desired device-throughput distribution
+ * only in a completely symmetric scenario where:
+ * (i) each of these processes must get the same throughput as
+ * the others;
+ * (ii) all these processes have the same I/O pattern
+ (either sequential or random).
+ * In fact, in such a scenario, the drive will tend to treat
+ * the requests of each of these processes in about the same
+ * way as the requests of the others, and thus to provide
+ * each of these processes with about the same throughput
+ * (which is exactly the desired throughput distribution). In
+ * contrast, in any asymmetric scenario, device idling is
+ * certainly needed to guarantee that bfqq receives its
+ * assigned fraction of the device throughput (see [1] for
+ * details).
+ *
+ * We address this issue by controlling, actually, only the
+ * symmetry sub-condition (i), i.e., provided that
+ * sub-condition (i) holds, idling is not performed,
+ * regardless of whether sub-condition (ii) holds. In other
+ * words, only if sub-condition (i) holds, then idling is
+ * allowed, and the device tends to be prevented from queueing
+ * many requests, possibly of several processes. The reason
+ * for not controlling also sub-condition (ii) is that, first,
+ * in the case of an HDD, the asymmetry in terms of types of
+ * I/O patterns is already taken in to account in the above
+ * sentinel variable
+ * on_hdd_and_not_all_queues_seeky. Secondly, in the case of a
+ * flash-based device, we prefer however to privilege
+ * throughput (and idling lowers throughput for this type of
+ * devices), for the following reasons:
+ * 1) differently from HDDs, the service time of random
+ * requests is not orders of magnitudes lower than the service
+ * time of sequential requests; thus, even if processes doing
+ * sequential I/O get a preferential treatment with respect to
+ * others doing random I/O, the consequences are not as
+ * dramatic as with HDDs;
+ * 2) if a process doing random I/O does need strong
+ * throughput guarantees, it is hopefully already being
+ * weight-raised, or the user is likely to have assigned it a
+ * higher weight than the other processes (and thus
+ * sub-condition (i) is likely to be false, which triggers
+ * idling).
+ *
+ * According to the above considerations, the next variable is
+ * true (only) if sub-condition (i) holds. To compute the
+ * value of this variable, we not only use the return value of
+ * the function bfq_symmetric_scenario(), but also check
+ * whether bfqq is being weight-raised, because
+ * bfq_symmetric_scenario() does not take into account also
+ * weight-raised queues (see comments to
+ * bfq_weights_tree_add()).
+ *
+ * As a side note, it is worth considering that the above
+ * device-idling countermeasures may however fail in the
+ * following unlucky scenario: if idling is (correctly)
+ * disabled in a time period during which all symmetry
+ * sub-conditions hold, and hence the device is allowed to
+ * enqueue many requests, but at some later point in time some
+ * sub-condition stops to hold, then it may become impossible
+ * to let requests be served in the desired order until all
+ * the requests already queued in the device have been served.
+ */
+ asymmetric_scenario = bfqq->wr_coeff > 1 ||
+ !bfq_symmetric_scenario(bfqd);
+
+ /*
+ * Finally, there is a case where maximizing throughput is the
+ * best choice even if it may cause unfairness toward
+ * bfqq. Such a case is when bfqq became active in a burst of
+ * queue activations. Queues that became active during a large
+ * burst benefit only from throughput, as discussed in the
+ * comments to bfq_handle_burst. Thus, if bfqq became active
+ * in a burst and not idling the device maximizes throughput,
+ * then the device must no be idled, because not idling the
+ * device provides bfqq and all other queues in the burst with
+ * maximum benefit. Combining this and the two cases above, we
+ * can now establish when idling is actually needed to
+ * preserve service guarantees.
+ */
+ idling_needed_for_service_guarantees =
+ (on_hdd_and_not_all_queues_seeky || asymmetric_scenario) &&
+ !bfq_bfqq_in_large_burst(bfqq);
+ /*
+ * We have now all the components we need to compute the return
+ * value of the function, which is true only if both the following
+ * conditions hold:
+ * 1) bfqq is sync, because idling make sense only for sync queues;
+ * 2) idling either boosts the throughput (without issues), or
+ * is necessary to preserve service guarantees.
+ */
return bfq_bfqq_sync(bfqq) &&
- !cond_for_expiring_in_burst &&
- (bfqq->wr_coeff > 1 || !symmetric_scenario ||
- (bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_idle_window(bfqq) &&
- !cond_for_expiring_non_wr)
- );
+ (idling_boosts_thr_without_issues ||
+ idling_needed_for_service_guarantees);
}
/*
- * If the in-service queue is empty but sync, and the function
- * bfq_bfqq_must_not_expire returns true, then:
+ * If the in-service queue is empty but the function bfq_bfqq_may_idle
+ * returns true, then:
* 1) the queue must remain in service and cannot be expired, and
- * 2) the disk must be idled to wait for the possible arrival of a new
+ * 2) the device must be idled to wait for the possible arrival of a new
* request for the queue.
- * See the comments to the function bfq_bfqq_must_not_expire for the reasons
+ * See the comments to the function bfq_bfqq_may_idle for the reasons
* why performing device idling is the best choice to boost the throughput
- * and preserve service guarantees when bfq_bfqq_must_not_expire itself
+ * and preserve service guarantees when bfq_bfqq_may_idle itself
* returns true.
*/
-static inline bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
+static bool bfq_bfqq_must_idle(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
return RB_EMPTY_ROOT(&bfqq->sort_list) && bfqd->bfq_slice_idle != 0 &&
- bfq_bfqq_must_not_expire(bfqq);
+ bfq_bfqq_may_idle(bfqq);
}
/*
@@ -2533,7 +2653,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
enum bfqq_expiration reason = BFQ_BFQQ_BUDGET_TIMEOUT;
bfqq = bfqd->in_service_queue;
- if (bfqq == NULL)
+ if (!bfqq)
goto new_queue;
bfq_log_bfqq(bfqd, bfqq, "select_queue: already in-service queue");
@@ -2548,7 +2668,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
* If bfqq has requests queued and it has enough budget left to
* serve them, keep the queue, otherwise expire it.
*/
- if (next_rq != NULL) {
+ if (next_rq) {
if (bfq_serv_to_charge(next_rq, bfqq) >
bfq_bfqq_budget_left(bfqq)) {
reason = BFQ_BFQQ_BUDGET_EXHAUSTED;
@@ -2575,6 +2695,9 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
*/
bfq_clear_bfqq_wait_request(bfqq);
del_timer(&bfqd->idle_slice_timer);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_idle_time(bfqq_group(bfqq));
+#endif
}
goto keep_queue;
}
@@ -2586,18 +2709,18 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd)
* may idle after their completion, then keep it anyway.
*/
if (timer_pending(&bfqd->idle_slice_timer) ||
- (bfqq->dispatched != 0 && bfq_bfqq_must_not_expire(bfqq))) {
+ (bfqq->dispatched != 0 && bfq_bfqq_may_idle(bfqq))) {
bfqq = NULL;
goto keep_queue;
}
reason = BFQ_BFQQ_NO_MORE_REQUESTS;
expire:
- bfq_bfqq_expire(bfqd, bfqq, 0, reason);
+ bfq_bfqq_expire(bfqd, bfqq, false, reason);
new_queue:
bfqq = bfq_set_in_service_queue(bfqd);
bfq_log(bfqd, "select_queue: new queue %d returned",
- bfqq != NULL ? bfqq->pid : 0);
+ bfqq ? bfqq->pid : 0);
keep_queue:
return bfqq;
}
@@ -2615,7 +2738,7 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq)
BUG_ON(bfqq != bfqd->in_service_queue && entity->weight !=
entity->orig_weight * bfqq->wr_coeff);
- if (entity->ioprio_changed)
+ if (entity->prio_changed)
bfq_log_bfqq(bfqd, bfqq, "WARN: pending prio change");
/*
@@ -2659,7 +2782,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
/* Follow expired path, else get first next available. */
rq = bfq_check_fifo(bfqq);
- if (rq == NULL)
+ if (!rq)
rq = bfqq->next_rq;
service_to_charge = bfq_serv_to_charge(rq, bfqq);
@@ -2695,14 +2818,14 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
bfq_update_wr_data(bfqd, bfqq);
bfq_log_bfqq(bfqd, bfqq,
- "dispatched %u sec req (%llu), budg left %lu",
+ "dispatched %u sec req (%llu), budg left %d",
blk_rq_sectors(rq),
(long long unsigned)blk_rq_pos(rq),
bfq_bfqq_budget_left(bfqq));
dispatched++;
- if (bfqd->in_service_bic == NULL) {
+ if (!bfqd->in_service_bic) {
atomic_long_inc(&RQ_BIC(rq)->icq.ioc->refcount);
bfqd->in_service_bic = RQ_BIC(rq);
}
@@ -2715,7 +2838,7 @@ static int bfq_dispatch_request(struct bfq_data *bfqd,
return dispatched;
expire:
- bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_EXHAUSTED);
+ bfq_bfqq_expire(bfqd, bfqq, false, BFQ_BFQQ_BUDGET_EXHAUSTED);
return dispatched;
}
@@ -2723,7 +2846,7 @@ static int __bfq_forced_dispatch_bfqq(struct bfq_queue *bfqq)
{
int dispatched = 0;
- while (bfqq->next_rq != NULL) {
+ while (bfqq->next_rq) {
bfq_dispatch_insert(bfqq->bfqd->queue, bfqq->next_rq);
dispatched++;
}
@@ -2743,7 +2866,7 @@ static int bfq_forced_dispatch(struct bfq_data *bfqd)
int dispatched = 0;
bfqq = bfqd->in_service_queue;
- if (bfqq != NULL)
+ if (bfqq)
__bfq_bfqq_expire(bfqd, bfqq);
/*
@@ -2779,7 +2902,7 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
return bfq_forced_dispatch(bfqd);
bfqq = bfq_select_queue(bfqd);
- if (bfqq == NULL)
+ if (!bfqq)
return 0;
if (bfq_class_idle(bfqq))
@@ -2819,6 +2942,9 @@ static int bfq_dispatch_requests(struct request_queue *q, int force)
static void bfq_put_queue(struct bfq_queue *bfqq)
{
struct bfq_data *bfqd = bfqq->bfqd;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ struct bfq_group *bfqg = bfqq_group(bfqq);
+#endif
BUG_ON(atomic_read(&bfqq->ref) <= 0);
@@ -2827,9 +2953,9 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
if (!atomic_dec_and_test(&bfqq->ref))
return;
- BUG_ON(rb_first(&bfqq->sort_list) != NULL);
+ BUG_ON(rb_first(&bfqq->sort_list));
BUG_ON(bfqq->allocated[READ] + bfqq->allocated[WRITE] != 0);
- BUG_ON(bfqq->entity.tree != NULL);
+ BUG_ON(bfqq->entity.tree);
BUG_ON(bfq_bfqq_busy(bfqq));
BUG_ON(bfqd->in_service_queue == bfqq);
@@ -2847,6 +2973,9 @@ static void bfq_put_queue(struct bfq_queue *bfqq)
bfq_log_bfqq(bfqd, bfqq, "put_queue: %p freed", bfqq);
kmem_cache_free(bfq_pool, bfqq);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_put(bfqg);
+#endif
}
static void bfq_put_cooperator(struct bfq_queue *bfqq)
@@ -2883,7 +3012,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static inline void bfq_init_icq(struct io_cq *icq)
+static void bfq_init_icq(struct io_cq *icq)
{
struct bfq_io_cq *bic = icq_to_bic(icq);
@@ -2950,40 +3079,38 @@ static void bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *b
/*
* No prio set, inherit CPU scheduling settings.
*/
- bfqq->entity.new_ioprio = task_nice_ioprio(tsk);
- bfqq->entity.new_ioprio_class = task_nice_ioclass(tsk);
+ bfqq->new_ioprio = task_nice_ioprio(tsk);
+ bfqq->new_ioprio_class = task_nice_ioclass(tsk);
break;
case IOPRIO_CLASS_RT:
- bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
- bfqq->entity.new_ioprio_class = IOPRIO_CLASS_RT;
+ bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+ bfqq->new_ioprio_class = IOPRIO_CLASS_RT;
break;
case IOPRIO_CLASS_BE:
- bfqq->entity.new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
- bfqq->entity.new_ioprio_class = IOPRIO_CLASS_BE;
+ bfqq->new_ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
+ bfqq->new_ioprio_class = IOPRIO_CLASS_BE;
break;
case IOPRIO_CLASS_IDLE:
- bfqq->entity.new_ioprio_class = IOPRIO_CLASS_IDLE;
- bfqq->entity.new_ioprio = 7;
+ bfqq->new_ioprio_class = IOPRIO_CLASS_IDLE;
+ bfqq->new_ioprio = 7;
bfq_clear_bfqq_idle_window(bfqq);
break;
}
- if (bfqq->entity.new_ioprio < 0 ||
- bfqq->entity.new_ioprio >= IOPRIO_BE_NR) {
+ if (bfqq->new_ioprio < 0 || bfqq->new_ioprio >= IOPRIO_BE_NR) {
printk(KERN_CRIT "bfq_set_next_ioprio_data: new_ioprio %d\n",
- bfqq->entity.new_ioprio);
+ bfqq->new_ioprio);
BUG();
}
- bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->entity.new_ioprio);
- bfqq->entity.ioprio_changed = 1;
+ bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio);
+ bfqq->entity.prio_changed = 1;
}
-static void bfq_check_ioprio_change(struct bfq_io_cq *bic)
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
{
struct bfq_data *bfqd;
struct bfq_queue *bfqq, *new_bfqq;
- struct bfq_group *bfqg;
unsigned long uninitialized_var(flags);
int ioprio = bic->icq.ioc->ioprio;
@@ -2993,18 +3120,16 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic)
* This condition may trigger on a newly created bic, be sure to
* drop the lock before returning.
*/
- if (unlikely(bfqd == NULL) || likely(bic->ioprio == ioprio))
+ if (unlikely(!bfqd) || likely(bic->ioprio == ioprio))
goto out;
bic->ioprio = ioprio;
bfqq = bic->bfqq[BLK_RW_ASYNC];
- if (bfqq != NULL) {
- bfqg = container_of(bfqq->entity.sched_data, struct bfq_group,
- sched_data);
- new_bfqq = bfq_get_queue(bfqd, bfqg, BLK_RW_ASYNC, bic,
+ if (bfqq) {
+ new_bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic,
GFP_ATOMIC);
- if (new_bfqq != NULL) {
+ if (new_bfqq) {
bic->bfqq[BLK_RW_ASYNC] = new_bfqq;
bfq_log_bfqq(bfqd, bfqq,
"check_ioprio_change: bfqq %p %d",
@@ -3014,7 +3139,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic)
}
bfqq = bic->bfqq[BLK_RW_SYNC];
- if (bfqq != NULL)
+ if (bfqq)
bfq_set_next_ioprio_data(bfqq, bic);
out:
@@ -3038,7 +3163,8 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfq_class_idle(bfqq))
bfq_mark_bfqq_idle_window(bfqq);
bfq_mark_bfqq_sync(bfqq);
- }
+ } else
+ bfq_clear_bfqq_sync(bfqq);
bfq_mark_bfqq_IO_bound(bfqq);
/* Tentative initial value to trade off between thr and lat */
@@ -3055,14 +3181,19 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
}
static struct bfq_queue *bfq_find_alloc_queue(struct bfq_data *bfqd,
- struct bfq_group *bfqg,
- int is_sync,
+ struct bio *bio, int is_sync,
struct bfq_io_cq *bic,
gfp_t gfp_mask)
{
+ struct bfq_group *bfqg;
struct bfq_queue *bfqq, *new_bfqq = NULL;
+ struct blkcg *blkcg;
retry:
+ rcu_read_lock();
+
+ blkcg = bio_blkcg(bio);
+ bfqg = bfq_find_alloc_group(bfqd, blkcg);
/* bic always exists here */
bfqq = bic_to_bfqq(bic, is_sync);
@@ -3070,18 +3201,19 @@ retry:
* Always try a new alloc if we fall back to the OOM bfqq
* originally, since it should just be a temporary situation.
*/
- if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
+ if (!bfqq || bfqq == &bfqd->oom_bfqq) {
bfqq = NULL;
- if (new_bfqq != NULL) {
+ if (new_bfqq) {
bfqq = new_bfqq;
new_bfqq = NULL;
} else if (gfp_mask & __GFP_WAIT) {
+ rcu_read_unlock();
spin_unlock_irq(bfqd->queue->queue_lock);
new_bfqq = kmem_cache_alloc_node(bfq_pool,
gfp_mask | __GFP_ZERO,
bfqd->queue->node);
spin_lock_irq(bfqd->queue->queue_lock);
- if (new_bfqq != NULL)
+ if (new_bfqq)
goto retry;
} else {
bfqq = kmem_cache_alloc_node(bfq_pool,
@@ -3089,7 +3221,7 @@ retry:
bfqd->queue->node);
}
- if (bfqq != NULL) {
+ if (bfqq) {
bfq_init_bfqq(bfqd, bfqq, bic, current->pid,
is_sync);
bfq_init_entity(&bfqq->entity, bfqg);
@@ -3100,9 +3232,11 @@ retry:
}
}
- if (new_bfqq != NULL)
+ if (new_bfqq)
kmem_cache_free(bfq_pool, new_bfqq);
+ rcu_read_unlock();
+
return bfqq;
}
@@ -3126,7 +3260,7 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd,
}
static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
- struct bfq_group *bfqg, int is_sync,
+ struct bio *bio, int is_sync,
struct bfq_io_cq *bic, gfp_t gfp_mask)
{
const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio);
@@ -3135,19 +3269,26 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
struct bfq_queue *bfqq = NULL;
if (!is_sync) {
+ struct blkcg *blkcg;
+ struct bfq_group *bfqg;
+
+ rcu_read_lock();
+ blkcg = bio_blkcg(bio);
+ rcu_read_unlock();
+ bfqg = bfq_find_alloc_group(bfqd, blkcg);
async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class,
ioprio);
bfqq = *async_bfqq;
}
- if (bfqq == NULL)
- bfqq = bfq_find_alloc_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
+ if (!bfqq)
+ bfqq = bfq_find_alloc_queue(bfqd, bio, is_sync, bic, gfp_mask);
/*
* Pin the queue now that it's allocated, scheduler exit will
* prune it.
*/
- if (!is_sync && *async_bfqq == NULL) {
+ if (!is_sync && !(*async_bfqq)) {
atomic_inc(&bfqq->ref);
bfq_log_bfqq(bfqd, bfqq, "get_queue, bfqq not in async: %p, %d",
bfqq, atomic_read(&bfqq->ref));
@@ -3280,9 +3421,9 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfqq->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq);
if (bfqq == bfqd->in_service_queue && bfq_bfqq_wait_request(bfqq)) {
- int small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
- blk_rq_sectors(rq) < 32;
- int budget_timeout = bfq_bfqq_budget_timeout(bfqq);
+ bool small_req = bfqq->queued[rq_is_sync(rq)] == 1 &&
+ blk_rq_sectors(rq) < 32;
+ bool budget_timeout = bfq_bfqq_budget_timeout(bfqq);
/*
* There is just this request queued: if the request
@@ -3309,6 +3450,9 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
bfq_clear_bfqq_wait_request(bfqq);
del_timer(&bfqd->idle_slice_timer);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_idle_time(bfqq_group(bfqq));
+#endif
/*
* The queue is not empty, because a new request just
@@ -3318,7 +3462,8 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* See [1] for more details.
*/
if (budget_timeout)
- bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
+ bfq_bfqq_expire(bfqd, bfqq, false,
+ BFQ_BFQQ_BUDGET_TIMEOUT);
/*
* Let the request rip immediately, or let a new queue be
@@ -3342,7 +3487,7 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
*/
if (!in_interrupt()) {
new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true);
- if (new_bfqq != NULL) {
+ if (new_bfqq) {
if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
/*
@@ -3370,7 +3515,7 @@ static void bfq_insert_request(struct request_queue *q, struct request *rq)
* from assigning it a full weight-raising period. See the detailed
* comments about this field in bfq_init_icq().
*/
- if (bfqq->bic != NULL)
+ if (bfqq->bic)
bfqq->bic->wr_time_left = 0;
rq->fifo_time = jiffies + bfqd->bfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &bfqq->fifo);
@@ -3418,6 +3563,11 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
BUG_ON(!bfqq->dispatched);
bfqd->rq_in_driver--;
bfqq->dispatched--;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_completion(bfqq_group(bfqq),
+ rq_start_time_ns(rq),
+ rq_io_start_time_ns(rq), rq->cmd_flags);
+#endif
if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) {
bfq_weights_tree_remove(bfqd, &bfqq->entity,
@@ -3462,11 +3612,12 @@ static void bfq_completed_request(struct request_queue *q, struct request *rq)
bfq_arm_slice_timer(bfqd);
goto out;
} else if (bfq_may_expire_for_budg_timeout(bfqq))
- bfq_bfqq_expire(bfqd, bfqq, 0, BFQ_BFQQ_BUDGET_TIMEOUT);
+ bfq_bfqq_expire(bfqd, bfqq, false,
+ BFQ_BFQQ_BUDGET_TIMEOUT);
else if (RB_EMPTY_ROOT(&bfqq->sort_list) &&
(bfqq->dispatched == 0 ||
- !bfq_bfqq_must_not_expire(bfqq)))
- bfq_bfqq_expire(bfqd, bfqq, 0,
+ !bfq_bfqq_may_idle(bfqq)))
+ bfq_bfqq_expire(bfqd, bfqq, false,
BFQ_BFQQ_NO_MORE_REQUESTS);
}
@@ -3477,7 +3628,7 @@ out:
return;
}
-static inline int __bfq_may_queue(struct bfq_queue *bfqq)
+static int __bfq_may_queue(struct bfq_queue *bfqq)
{
if (bfq_bfqq_wait_request(bfqq) && bfq_bfqq_must_alloc(bfqq)) {
bfq_clear_bfqq_must_alloc(bfqq);
@@ -3501,11 +3652,11 @@ static int bfq_may_queue(struct request_queue *q, int rw)
* 'may queue' if that fails.
*/
bic = bfq_bic_lookup(bfqd, tsk->io_context);
- if (bic == NULL)
+ if (!bic)
return ELV_MQUEUE_MAY;
bfqq = bic_to_bfqq(bic, rw_is_sync(rw));
- if (bfqq != NULL)
+ if (bfqq)
return __bfq_may_queue(bfqq);
return ELV_MQUEUE_MAY;
@@ -3518,7 +3669,7 @@ static void bfq_put_request(struct request *rq)
{
struct bfq_queue *bfqq = RQ_BFQQ(rq);
- if (bfqq != NULL) {
+ if (bfqq) {
const int rw = rq_data_dir(rq);
BUG_ON(!bfqq->allocated[rw]);
@@ -3570,25 +3721,24 @@ static int bfq_set_request(struct request_queue *q, struct request *rq,
const int rw = rq_data_dir(rq);
const int is_sync = rq_is_sync(rq);
struct bfq_queue *bfqq;
- struct bfq_group *bfqg;
unsigned long flags;
bool split = false;
might_sleep_if(gfp_mask & __GFP_WAIT);
- bfq_check_ioprio_change(bic);
+ bfq_check_ioprio_change(bic, bio);
spin_lock_irqsave(q->queue_lock, flags);
- if (bic == NULL)
+ if (!bic)
goto queue_fail;
- bfqg = bfq_bic_update_cgroup(bic);
+ bfq_bic_update_cgroup(bic, bio);
new_queue:
bfqq = bic_to_bfqq(bic, is_sync);
- if (bfqq == NULL || bfqq == &bfqd->oom_bfqq) {
- bfqq = bfq_get_queue(bfqd, bfqg, is_sync, bic, gfp_mask);
+ if (!bfqq || bfqq == &bfqd->oom_bfqq) {
+ bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, gfp_mask);
bic_set_bfqq(bic, bfqq, is_sync);
if (split && is_sync) {
if ((bic->was_in_burst_list && bfqd->large_burst) ||
@@ -3684,7 +3834,7 @@ static void bfq_idle_slice_timer(unsigned long data)
* the in-service queue. This can hardly happen, but in the worst
* case we just expire a queue too early.
*/
- if (bfqq != NULL) {
+ if (bfqq) {
bfq_log_bfqq(bfqd, bfqq, "slice_timer expired");
if (bfq_bfqq_budget_timeout(bfqq))
/*
@@ -3704,7 +3854,7 @@ static void bfq_idle_slice_timer(unsigned long data)
else
goto schedule_dispatch;
- bfq_bfqq_expire(bfqd, bfqq, 1, reason);
+ bfq_bfqq_expire(bfqd, bfqq, true, reason);
}
schedule_dispatch:
@@ -3719,14 +3869,14 @@ static void bfq_shutdown_timer_wq(struct bfq_data *bfqd)
cancel_work_sync(&bfqd->unplug_work);
}
-static inline void __bfq_put_async_bfqq(struct bfq_data *bfqd,
+static void __bfq_put_async_bfqq(struct bfq_data *bfqd,
struct bfq_queue **bfqq_ptr)
{
struct bfq_group *root_group = bfqd->root_group;
struct bfq_queue *bfqq = *bfqq_ptr;
bfq_log(bfqd, "put_async_bfqq: %p", bfqq);
- if (bfqq != NULL) {
+ if (bfqq) {
bfq_bfqq_move(bfqd, bfqq, &bfqq->entity, root_group);
bfq_log_bfqq(bfqd, bfqq, "put_async_bfqq: putting %p, %d",
bfqq, atomic_read(&bfqq->ref));
@@ -3762,7 +3912,7 @@ static void bfq_exit_queue(struct elevator_queue *e)
spin_lock_irq(q->queue_lock);
- BUG_ON(bfqd->in_service_queue != NULL);
+ BUG_ON(bfqd->in_service_queue);
list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
bfq_deactivate_bfqq(bfqd, bfqq, 0);
@@ -3775,22 +3925,39 @@ static void bfq_exit_queue(struct elevator_queue *e)
BUG_ON(timer_pending(&bfqd->idle_slice_timer));
- bfq_free_root_group(bfqd);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ blkcg_deactivate_policy(q, &blkcg_policy_bfq);
+#endif
+
kfree(bfqd);
}
+static void bfq_init_root_group(struct bfq_group *root_group,
+ struct bfq_data *bfqd)
+{
+ int i;
+
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ root_group->entity.parent = NULL;
+ root_group->my_entity = NULL;
+ root_group->bfqd = bfqd;
+#endif
+ root_group->rq_pos_tree = RB_ROOT;
+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+ root_group->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
+}
+
static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
{
- struct bfq_group *bfqg;
struct bfq_data *bfqd;
struct elevator_queue *eq;
eq = elevator_alloc(q, e);
- if (eq == NULL)
+ if (!eq)
return -ENOMEM;
bfqd = kzalloc_node(sizeof(*bfqd), GFP_KERNEL, q->node);
- if (bfqd == NULL) {
+ if (!bfqd) {
kobject_put(&eq->kobj);
return -ENOMEM;
}
@@ -3803,16 +3970,16 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
*/
bfq_init_bfqq(bfqd, &bfqd->oom_bfqq, NULL, 1, 0);
atomic_inc(&bfqd->oom_bfqq.ref);
- bfqd->oom_bfqq.entity.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
- bfqd->oom_bfqq.entity.new_ioprio_class = IOPRIO_CLASS_BE;
+ bfqd->oom_bfqq.new_ioprio = BFQ_DEFAULT_QUEUE_IOPRIO;
+ bfqd->oom_bfqq.new_ioprio_class = IOPRIO_CLASS_BE;
bfqd->oom_bfqq.entity.new_weight =
- bfq_ioprio_to_weight(bfqd->oom_bfqq.entity.new_ioprio);
+ bfq_ioprio_to_weight(bfqd->oom_bfqq.new_ioprio);
/*
* Trigger weight initialization, according to ioprio, at the
* oom_bfqq's first activation. The oom_bfqq's ioprio and ioprio
* class won't be changed any more.
*/
- bfqd->oom_bfqq.entity.ioprio_changed = 1;
+ bfqd->oom_bfqq.entity.prio_changed = 1;
bfqd->queue = q;
@@ -3820,16 +3987,12 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
q->elevator = eq;
spin_unlock_irq(q->queue_lock);
- bfqg = bfq_alloc_root_group(bfqd, q->node);
- if (bfqg == NULL) {
- kfree(bfqd);
- kobject_put(&eq->kobj);
- return -ENOMEM;
- }
-
- bfqd->root_group = bfqg;
+ bfqd->root_group = bfq_create_group_hierarchy(bfqd, q->node);
+ if (!bfqd->root_group)
+ goto out_free;
+ bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
bfqd->active_numerous_groups = 0;
#endif
@@ -3837,7 +4000,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
bfqd->idle_slice_timer.data = (unsigned long)bfqd;
- bfqd->rq_pos_tree = RB_ROOT;
bfqd->queue_weights_tree = RB_ROOT;
bfqd->group_weights_tree = RB_ROOT;
@@ -3895,18 +4057,23 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
bfqd->device_speed = BFQ_BFQD_FAST;
return 0;
+
+out_free:
+ kfree(bfqd);
+ kobject_put(&eq->kobj);
+ return -ENOMEM;
}
static void bfq_slab_kill(void)
{
- if (bfq_pool != NULL)
+ if (bfq_pool)
kmem_cache_destroy(bfq_pool);
}
static int __init bfq_slab_setup(void)
{
bfq_pool = KMEM_CACHE(bfq_queue, 0);
- if (bfq_pool == NULL)
+ if (!bfq_pool)
return -ENOMEM;
return 0;
}
@@ -4051,7 +4218,7 @@ static ssize_t bfq_weights_store(struct elevator_queue *e,
return count;
}
-static inline unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
+static unsigned long bfq_estimated_max_budget(struct bfq_data *bfqd)
{
u64 timeout = jiffies_to_msecs(bfqd->bfq_timeout[BLK_RW_SYNC]);
@@ -4145,6 +4312,9 @@ static struct elevator_type iosched_bfq = {
.elevator_merge_fn = bfq_merge,
.elevator_merged_fn = bfq_merged_request,
.elevator_merge_req_fn = bfq_merged_requests,
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ .elevator_bio_merged_fn = bfq_bio_merged,
+#endif
.elevator_allow_merge_fn = bfq_allow_merge,
.elevator_dispatch_fn = bfq_dispatch_requests,
.elevator_add_req_fn = bfq_insert_request,
@@ -4170,6 +4340,8 @@ static struct elevator_type iosched_bfq = {
static int __init bfq_init(void)
{
+ int ret;
+
/*
* Can be 0 on HZ < 1000 setups.
*/
@@ -4179,8 +4351,15 @@ static int __init bfq_init(void)
if (bfq_timeout_async == 0)
bfq_timeout_async = 1;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ ret = blkcg_policy_register(&blkcg_policy_bfq);
+ if (ret)
+ return ret;
+#endif
+
+ ret = -ENOMEM;
if (bfq_slab_setup())
- return -ENOMEM;
+ goto err_pol_unreg;
/*
* Times to load large popular applications for the typical systems
@@ -4199,15 +4378,27 @@ static int __init bfq_init(void)
device_speed_thresh[0] = (R_fast[0] + R_slow[0]) / 2;
device_speed_thresh[1] = (R_fast[1] + R_slow[1]) / 2;
- elv_register(&iosched_bfq);
- pr_info("BFQ I/O-scheduler: v7r8");
+ ret = elv_register(&iosched_bfq);
+ if (ret)
+ goto err_pol_unreg;
+
+ pr_info("BFQ I/O-scheduler: v7r9");
return 0;
+
+err_pol_unreg:
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ blkcg_policy_unregister(&blkcg_policy_bfq);
+#endif
+ return ret;
}
static void __exit bfq_exit(void)
{
elv_unregister(&iosched_bfq);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ blkcg_policy_unregister(&blkcg_policy_bfq);
+#endif
bfq_slab_kill();
}
diff --git a/block/bfq-sched.c b/block/bfq-sched.c
index d0890c6d4..9328a1f09 100644
--- a/block/bfq-sched.c
+++ b/block/bfq-sched.c
@@ -10,24 +10,27 @@
* Copyright (C) 2010 Paolo Valente <paolo.valente@unimore.it>
*/
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
#define for_each_entity(entity) \
- for (; entity != NULL; entity = entity->parent)
+ for (; entity ; entity = entity->parent)
#define for_each_entity_safe(entity, parent) \
for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
+
static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
int extract,
struct bfq_data *bfqd);
-static inline void bfq_update_budget(struct bfq_entity *next_in_service)
+static struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
+
+static void bfq_update_budget(struct bfq_entity *next_in_service)
{
struct bfq_entity *bfqg_entity;
struct bfq_group *bfqg;
struct bfq_sched_data *group_sd;
- BUG_ON(next_in_service == NULL);
+ BUG_ON(!next_in_service);
group_sd = next_in_service->sched_data;
@@ -38,7 +41,7 @@ static inline void bfq_update_budget(struct bfq_entity *next_in_service)
* as it must never become an in-service entity.
*/
bfqg_entity = bfqg->my_entity;
- if (bfqg_entity != NULL)
+ if (bfqg_entity)
bfqg_entity->budget = next_in_service->budget;
}
@@ -46,7 +49,7 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
{
struct bfq_entity *next_in_service;
- if (sd->in_service_entity != NULL)
+ if (sd->in_service_entity)
/* will update/requeue at the end of service */
return 0;
@@ -60,35 +63,35 @@ static int bfq_update_next_in_service(struct bfq_sched_data *sd)
next_in_service = bfq_lookup_next_entity(sd, 0, NULL);
sd->next_in_service = next_in_service;
- if (next_in_service != NULL)
+ if (next_in_service)
bfq_update_budget(next_in_service);
return 1;
}
-static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,
- struct bfq_entity *entity)
+static void bfq_check_next_in_service(struct bfq_sched_data *sd,
+ struct bfq_entity *entity)
{
BUG_ON(sd->next_in_service != entity);
}
#else
#define for_each_entity(entity) \
- for (; entity != NULL; entity = NULL)
+ for (; entity ; entity = NULL)
#define for_each_entity_safe(entity, parent) \
- for (parent = NULL; entity != NULL; entity = parent)
+ for (parent = NULL; entity ; entity = parent)
-static inline int bfq_update_next_in_service(struct bfq_sched_data *sd)
+static int bfq_update_next_in_service(struct bfq_sched_data *sd)
{
return 0;
}
-static inline void bfq_check_next_in_service(struct bfq_sched_data *sd,
- struct bfq_entity *entity)
+static void bfq_check_next_in_service(struct bfq_sched_data *sd,
+ struct bfq_entity *entity)
{
}
-static inline void bfq_update_budget(struct bfq_entity *next_in_service)
+static void bfq_update_budget(struct bfq_entity *next_in_service)
{
}
#endif
@@ -109,18 +112,18 @@ static inline void bfq_update_budget(struct bfq_entity *next_in_service)
*
* Return @a > @b, dealing with wrapping correctly.
*/
-static inline int bfq_gt(u64 a, u64 b)
+static int bfq_gt(u64 a, u64 b)
{
return (s64)(a - b) > 0;
}
-static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
+static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
{
struct bfq_queue *bfqq = NULL;
- BUG_ON(entity == NULL);
+ BUG_ON(!entity);
- if (entity->my_sched_data == NULL)
+ if (!entity->my_sched_data)
bfqq = container_of(entity, struct bfq_queue, entity);
return bfqq;
@@ -132,8 +135,7 @@ static inline struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity)
* @service: amount of service.
* @weight: scale factor (weight of an entity or weight sum).
*/
-static inline u64 bfq_delta(unsigned long service,
- unsigned long weight)
+static u64 bfq_delta(unsigned long service, unsigned long weight)
{
u64 d = (u64)service << WFQ_SERVICE_SHIFT;
@@ -146,8 +148,7 @@ static inline u64 bfq_delta(unsigned long service,
* @entity: the entity to act upon.
* @service: the service to be charged to the entity.
*/
-static inline void bfq_calc_finish(struct bfq_entity *entity,
- unsigned long service)
+static void bfq_calc_finish(struct bfq_entity *entity, unsigned long service)
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
@@ -156,7 +157,7 @@ static inline void bfq_calc_finish(struct bfq_entity *entity,
entity->finish = entity->start +
bfq_delta(service, entity->weight);
- if (bfqq != NULL) {
+ if (bfqq) {
bfq_log_bfqq(bfqq->bfqd, bfqq,
"calc_finish: serv %lu, w %d",
service, entity->weight);
@@ -176,11 +177,11 @@ static inline void bfq_calc_finish(struct bfq_entity *entity,
* conversion mechanism because, e.g., in the tree walking functions,
* the check for a %NULL value would be redundant.
*/
-static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)
+static struct bfq_entity *bfq_entity_of(struct rb_node *node)
{
struct bfq_entity *entity = NULL;
- if (node != NULL)
+ if (node)
entity = rb_entry(node, struct bfq_entity, rb_node);
return entity;
@@ -191,8 +192,7 @@ static inline struct bfq_entity *bfq_entity_of(struct rb_node *node)
* @root: the tree root.
* @entity: the entity to remove.
*/
-static inline void bfq_extract(struct rb_root *root,
- struct bfq_entity *entity)
+static void bfq_extract(struct rb_root *root, struct bfq_entity *entity)
{
BUG_ON(entity->tree != root);
@@ -225,7 +225,7 @@ static void bfq_idle_extract(struct bfq_service_tree *st,
bfq_extract(&st->idle, entity);
- if (bfqq != NULL)
+ if (bfqq)
list_del(&bfqq->bfqq_list);
}
@@ -243,9 +243,9 @@ static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
struct rb_node **node = &root->rb_node;
struct rb_node *parent = NULL;
- BUG_ON(entity->tree != NULL);
+ BUG_ON(entity->tree);
- while (*node != NULL) {
+ while (*node) {
parent = *node;
entry = rb_entry(parent, struct bfq_entity, rb_node);
@@ -271,12 +271,11 @@ static void bfq_insert(struct rb_root *root, struct bfq_entity *entity)
* that the subtree rooted at @node (which may be its left or its right
* child) has a valid min_start value.
*/
-static inline void bfq_update_min(struct bfq_entity *entity,
- struct rb_node *node)
+static void bfq_update_min(struct bfq_entity *entity, struct rb_node *node)
{
struct bfq_entity *child;
- if (node != NULL) {
+ if (node) {
child = rb_entry(node, struct bfq_entity, rb_node);
if (bfq_gt(entity->min_start, child->min_start))
entity->min_start = child->min_start;
@@ -291,7 +290,7 @@ static inline void bfq_update_min(struct bfq_entity *entity,
* this function updates its min_start value. The left and right subtrees
* are assumed to hold a correct min_start value.
*/
-static inline void bfq_update_active_node(struct rb_node *node)
+static void bfq_update_active_node(struct rb_node *node)
{
struct bfq_entity *entity = rb_entry(node, struct bfq_entity, rb_node);
@@ -318,12 +317,12 @@ up:
bfq_update_active_node(node);
parent = rb_parent(node);
- if (parent == NULL)
+ if (!parent)
return;
- if (node == parent->rb_left && parent->rb_right != NULL)
+ if (node == parent->rb_left && parent->rb_right)
bfq_update_active_node(parent->rb_right);
- else if (parent->rb_left != NULL)
+ else if (parent->rb_left)
bfq_update_active_node(parent->rb_left);
node = parent;
@@ -355,7 +354,7 @@ static void bfq_active_insert(struct bfq_service_tree *st,
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
struct rb_node *node = &entity->rb_node;
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_sched_data *sd = NULL;
struct bfq_group *bfqg = NULL;
struct bfq_data *bfqd = NULL;
@@ -363,22 +362,22 @@ static void bfq_active_insert(struct bfq_service_tree *st,
bfq_insert(&st->active, entity);
- if (node->rb_left != NULL)
+ if (node->rb_left)
node = node->rb_left;
- else if (node->rb_right != NULL)
+ else if (node->rb_right)
node = node->rb_right;
bfq_update_active_tree(node);
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
sd = entity->sched_data;
bfqg = container_of(sd, struct bfq_group, sched_data);
BUG_ON(!bfqg);
bfqd = (struct bfq_data *)bfqg->bfqd;
#endif
- if (bfqq != NULL)
+ if (bfqq)
list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
else { /* bfq_group */
BUG_ON(!bfqd);
bfq_weights_tree_add(bfqd, entity, &bfqd->group_weights_tree);
@@ -397,31 +396,32 @@ static void bfq_active_insert(struct bfq_service_tree *st,
* bfq_ioprio_to_weight - calc a weight from an ioprio.
* @ioprio: the ioprio value to convert.
*/
-static inline unsigned short bfq_ioprio_to_weight(int ioprio)
+static unsigned short bfq_ioprio_to_weight(int ioprio)
{
BUG_ON(ioprio < 0 || ioprio >= IOPRIO_BE_NR);
- return IOPRIO_BE_NR - ioprio;
+ return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - ioprio;
}
/**
* bfq_weight_to_ioprio - calc an ioprio from a weight.
* @weight: the weight value to convert.
*
- * To preserve as mush as possible the old only-ioprio user interface,
+ * To preserve as much as possible the old only-ioprio user interface,
* 0 is used as an escape ioprio value for weights (numerically) equal or
- * larger than IOPRIO_BE_NR
+ * larger than IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF.
*/
-static inline unsigned short bfq_weight_to_ioprio(int weight)
+static unsigned short bfq_weight_to_ioprio(int weight)
{
BUG_ON(weight < BFQ_MIN_WEIGHT || weight > BFQ_MAX_WEIGHT);
- return IOPRIO_BE_NR - weight < 0 ? 0 : IOPRIO_BE_NR - weight;
+ return IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight < 0 ?
+ 0 : IOPRIO_BE_NR * BFQ_WEIGHT_CONVERSION_COEFF - weight;
}
-static inline void bfq_get_entity(struct bfq_entity *entity)
+static void bfq_get_entity(struct bfq_entity *entity)
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
- if (bfqq != NULL) {
+ if (bfqq) {
atomic_inc(&bfqq->ref);
bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
bfqq, atomic_read(&bfqq->ref));
@@ -441,15 +441,15 @@ static struct rb_node *bfq_find_deepest(struct rb_node *node)
{
struct rb_node *deepest;
- if (node->rb_right == NULL && node->rb_left == NULL)
+ if (!node->rb_right && !node->rb_left)
deepest = rb_parent(node);
- else if (node->rb_right == NULL)
+ else if (!node->rb_right)
deepest = node->rb_left;
- else if (node->rb_left == NULL)
+ else if (!node->rb_left)
deepest = node->rb_right;
else {
deepest = rb_next(node);
- if (deepest->rb_right != NULL)
+ if (deepest->rb_right)
deepest = deepest->rb_right;
else if (rb_parent(deepest) != node)
deepest = rb_parent(deepest);
@@ -468,7 +468,7 @@ static void bfq_active_extract(struct bfq_service_tree *st,
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
struct rb_node *node;
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_sched_data *sd = NULL;
struct bfq_group *bfqg = NULL;
struct bfq_data *bfqd = NULL;
@@ -477,18 +477,18 @@ static void bfq_active_extract(struct bfq_service_tree *st,
node = bfq_find_deepest(&entity->rb_node);
bfq_extract(&st->active, entity);
- if (node != NULL)
+ if (node)
bfq_update_active_tree(node);
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
sd = entity->sched_data;
bfqg = container_of(sd, struct bfq_group, sched_data);
BUG_ON(!bfqg);
bfqd = (struct bfq_data *)bfqg->bfqd;
#endif
- if (bfqq != NULL)
+ if (bfqq)
list_del(&bfqq->bfqq_list);
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
else { /* bfq_group */
BUG_ON(!bfqd);
bfq_weights_tree_remove(bfqd, entity,
@@ -519,14 +519,14 @@ static void bfq_idle_insert(struct bfq_service_tree *st,
struct bfq_entity *first_idle = st->first_idle;
struct bfq_entity *last_idle = st->last_idle;
- if (first_idle == NULL || bfq_gt(first_idle->finish, entity->finish))
+ if (!first_idle || bfq_gt(first_idle->finish, entity->finish))
st->first_idle = entity;
- if (last_idle == NULL || bfq_gt(entity->finish, last_idle->finish))
+ if (!last_idle || bfq_gt(entity->finish, last_idle->finish))
st->last_idle = entity;
bfq_insert(&st->idle, entity);
- if (bfqq != NULL)
+ if (bfqq)
list_add(&bfqq->bfqq_list, &bfqq->bfqd->idle_list);
}
@@ -549,7 +549,7 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
entity->on_st = 0;
st->wsum -= entity->weight;
- if (bfqq != NULL) {
+ if (bfqq) {
sd = entity->sched_data;
bfq_log_bfqq(bfqq->bfqd, bfqq, "forget_entity: %p %d",
bfqq, atomic_read(&bfqq->ref));
@@ -581,7 +581,7 @@ static void bfq_forget_idle(struct bfq_service_tree *st)
struct bfq_entity *first_idle = st->first_idle;
struct bfq_entity *last_idle = st->last_idle;
- if (RB_EMPTY_ROOT(&st->active) && last_idle != NULL &&
+ if (RB_EMPTY_ROOT(&st->active) && last_idle &&
!bfq_gt(last_idle->finish, st->vtime)) {
/*
* Forget the whole idle tree, increasing the vtime past
@@ -590,7 +590,7 @@ static void bfq_forget_idle(struct bfq_service_tree *st)
st->vtime = last_idle->finish;
}
- if (first_idle != NULL && !bfq_gt(first_idle->finish, st->vtime))
+ if (first_idle && !bfq_gt(first_idle->finish, st->vtime))
bfq_put_idle_entity(st, first_idle);
}
@@ -600,19 +600,19 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
{
struct bfq_service_tree *new_st = old_st;
- if (entity->ioprio_changed) {
+ if (entity->prio_changed) {
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
unsigned short prev_weight, new_weight;
struct bfq_data *bfqd = NULL;
struct rb_root *root;
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_sched_data *sd;
struct bfq_group *bfqg;
#endif
- if (bfqq != NULL)
+ if (bfqq)
bfqd = bfqq->bfqd;
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
else {
sd = entity->my_sched_data;
bfqg = container_of(sd, struct bfq_group, sched_data);
@@ -634,12 +634,14 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
BUG();
}
entity->orig_weight = entity->new_weight;
- entity->ioprio =
- bfq_weight_to_ioprio(entity->orig_weight);
+ if (bfqq)
+ bfqq->ioprio =
+ bfq_weight_to_ioprio(entity->orig_weight);
}
- entity->ioprio_class = entity->new_ioprio_class;
- entity->ioprio_changed = 0;
+ if (bfqq)
+ bfqq->ioprio_class = bfqq->new_ioprio_class;
+ entity->prio_changed = 0;
/*
* NOTE: here we may be changing the weight too early,
@@ -652,7 +654,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
prev_weight = entity->weight;
new_weight = entity->orig_weight *
- (bfqq != NULL ? bfqq->wr_coeff : 1);
+ (bfqq ? bfqq->wr_coeff : 1);
/*
* If the weight of the entity changes, remove the entity
* from its old weight counter (if there is a counter
@@ -683,6 +685,10 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
return new_st;
}
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
+#endif
+
/**
* bfq_bfqq_served - update the scheduler status after selection for
* service.
@@ -693,7 +699,7 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
* are synchronized every time a new bfqq is selected for service. By now,
* we keep it to better check consistency.
*/
-static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)
+static void bfq_bfqq_served(struct bfq_queue *bfqq, int served)
{
struct bfq_entity *entity = &bfqq->entity;
struct bfq_service_tree *st;
@@ -708,7 +714,10 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)
st->vtime += bfq_delta(served, st->wsum);
bfq_forget_idle(st);
}
- bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %lu secs", served);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_set_start_empty_time(bfqq_group(bfqq));
+#endif
+ bfq_log_bfqq(bfqq->bfqd, bfqq, "bfqq_served %d secs", served);
}
/**
@@ -721,7 +730,7 @@ static void bfq_bfqq_served(struct bfq_queue *bfqq, unsigned long served)
* budget. In this way we should obtain a sort of time-domain
* fairness among all the seeky/slow queues.
*/
-static inline void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
+static void bfq_bfqq_charge_full_budget(struct bfq_queue *bfqq)
{
struct bfq_entity *entity = &bfqq->entity;
@@ -746,7 +755,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity)
struct bfq_service_tree *st = bfq_entity_service_tree(entity);
if (entity == sd->in_service_entity) {
- BUG_ON(entity->tree != NULL);
+ BUG_ON(entity->tree);
/*
* If we are requeueing the current entity we have
* to take care of not charging to it service it has
@@ -837,7 +846,7 @@ static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
if (!entity->on_st)
return 0;
- BUG_ON(was_in_service && entity->tree != NULL);
+ BUG_ON(was_in_service && entity->tree);
if (was_in_service) {
bfq_calc_finish(entity, entity->service);
@@ -846,7 +855,7 @@ static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
bfq_active_extract(st, entity);
else if (entity->tree == &st->idle)
bfq_idle_extract(st, entity);
- else if (entity->tree != NULL)
+ else if (entity->tree)
BUG();
if (was_in_service || sd->next_in_service == entity)
@@ -884,7 +893,7 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
*/
break;
- if (sd->next_in_service != NULL)
+ if (sd->next_in_service)
/*
* The parent entity is still backlogged and
* the budgets on the path towards the root
@@ -953,7 +962,7 @@ static struct bfq_entity *bfq_first_active_entity(struct bfq_service_tree *st)
struct bfq_entity *entry, *first = NULL;
struct rb_node *node = st->active.rb_node;
- while (node != NULL) {
+ while (node) {
entry = rb_entry(node, struct bfq_entity, rb_node);
left:
if (!bfq_gt(entry->start, st->vtime))
@@ -961,7 +970,7 @@ left:
BUG_ON(bfq_gt(entry->min_start, st->vtime));
- if (node->rb_left != NULL) {
+ if (node->rb_left) {
entry = rb_entry(node->rb_left,
struct bfq_entity, rb_node);
if (!bfq_gt(entry->min_start, st->vtime)) {
@@ -969,12 +978,12 @@ left:
goto left;
}
}
- if (first != NULL)
+ if (first)
break;
node = node->rb_right;
}
- BUG_ON(first == NULL && !RB_EMPTY_ROOT(&st->active));
+ BUG_ON(!first && !RB_EMPTY_ROOT(&st->active));
return first;
}
@@ -1030,13 +1039,13 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
struct bfq_entity *entity;
int i = 0;
- BUG_ON(sd->in_service_entity != NULL);
+ BUG_ON(sd->in_service_entity);
- if (bfqd != NULL &&
+ if (bfqd &&
jiffies - bfqd->bfq_class_idle_last_service > BFQ_CL_IDLE_TIMEOUT) {
entity = __bfq_lookup_next_entity(st + BFQ_IOPRIO_CLASSES - 1,
true);
- if (entity != NULL) {
+ if (entity) {
i = BFQ_IOPRIO_CLASSES - 1;
bfqd->bfq_class_idle_last_service = jiffies;
sd->next_in_service = entity;
@@ -1044,7 +1053,7 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd,
}
for (; i < BFQ_IOPRIO_CLASSES; i++) {
entity = __bfq_lookup_next_entity(st + i, false);
- if (entity != NULL) {
+ if (entity) {
if (extract) {
bfq_check_next_in_service(sd, entity);
bfq_active_extract(st + i, entity);
@@ -1067,27 +1076,27 @@ static struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
struct bfq_sched_data *sd;
struct bfq_queue *bfqq;
- BUG_ON(bfqd->in_service_queue != NULL);
+ BUG_ON(bfqd->in_service_queue);
if (bfqd->busy_queues == 0)
return NULL;
sd = &bfqd->root_group->sched_data;
- for (; sd != NULL; sd = entity->my_sched_data) {
+ for (; sd ; sd = entity->my_sched_data) {
entity = bfq_lookup_next_entity(sd, 1, bfqd);
- BUG_ON(entity == NULL);
+ BUG_ON(!entity);
entity->service = 0;
}
bfqq = bfq_entity_to_bfqq(entity);
- BUG_ON(bfqq == NULL);
+ BUG_ON(!bfqq);
return bfqq;
}
static void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
{
- if (bfqd->in_service_bic != NULL) {
+ if (bfqd->in_service_bic) {
put_io_context(bfqd->in_service_bic->icq.ioc);
bfqd->in_service_bic = NULL;
}
@@ -1114,6 +1123,10 @@ static void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_activate_entity(entity);
}
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+static void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
+#endif
+
/*
* Called when the bfqq no longer has requests pending, remove it from
* the service tree.
@@ -1147,6 +1160,10 @@ static void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (bfqq->wr_coeff > 1)
bfqd->wr_busy_queues--;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ bfqg_stats_update_dequeue(bfqq_group(bfqq));
+#endif
+
bfq_deactivate_bfqq(bfqd, bfqq, requeue);
}
diff --git a/block/bfq.h b/block/bfq.h
index 96ffbf773..320c4389b 100644
--- a/block/bfq.h
+++ b/block/bfq.h
@@ -1,5 +1,5 @@
/*
- * BFQ-v7r8 for 4.1.0: data structures and common functions prototypes.
+ * BFQ-v7r9 for 4.2.0: data structures and common functions prototypes.
*
* Based on ideas and code from CFQ:
* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
@@ -17,12 +17,14 @@
#include <linux/hrtimer.h>
#include <linux/ioprio.h>
#include <linux/rbtree.h>
+#include <linux/blk-cgroup.h>
#define BFQ_IOPRIO_CLASSES 3
#define BFQ_CL_IDLE_TIMEOUT (HZ/5)
-#define BFQ_MIN_WEIGHT 1
-#define BFQ_MAX_WEIGHT 1000
+#define BFQ_MIN_WEIGHT 1
+#define BFQ_MAX_WEIGHT 1000
+#define BFQ_WEIGHT_CONVERSION_COEFF 10
#define BFQ_DEFAULT_QUEUE_IOPRIO 4
@@ -117,12 +119,8 @@ struct bfq_weight_counter {
* @ioprio: the ioprio in use.
* @new_weight: when a weight change is requested, the new weight value.
* @orig_weight: original weight, used to implement weight boosting
- * @new_ioprio: when an ioprio change is requested, the new ioprio value.
- * @ioprio_class: the ioprio_class in use.
- * @new_ioprio_class: when an ioprio_class change is requested, the new
- * ioprio_class value.
- * @ioprio_changed: flag, true when the user requested a weight, ioprio or
- * ioprio_class change.
+ * @prio_changed: flag, true when the user requested a weight, ioprio or
+ * ioprio_class change.
*
* A bfq_entity is used to represent either a bfq_queue (leaf node in the
* cgroup hierarchy) or a bfq_group into the upper level scheduler. Each
@@ -134,7 +132,7 @@ struct bfq_weight_counter {
* allow different weights on different devices, but this
* functionality is not exported to userspace by now. Priorities and
* weights are updated lazily, first storing the new values into the
- * new_* fields, then setting the @ioprio_changed flag. As soon as
+ * new_* fields, then setting the @prio_changed flag. As soon as
* there is a transition in the entity state that allows the priority
* update to take place the effective and the requested priority
* values are synchronized.
@@ -161,7 +159,7 @@ struct bfq_entity {
u64 min_start;
- unsigned long service, budget;
+ int service, budget;
unsigned short weight, new_weight;
unsigned short orig_weight;
@@ -170,10 +168,7 @@ struct bfq_entity {
struct bfq_sched_data *my_sched_data;
struct bfq_sched_data *sched_data;
- unsigned short ioprio, new_ioprio;
- unsigned short ioprio_class, new_ioprio_class;
-
- int ioprio_changed;
+ int prio_changed;
};
struct bfq_group;
@@ -182,10 +177,14 @@ struct bfq_group;
* struct bfq_queue - leaf schedulable entity.
* @ref: reference counter.
* @bfqd: parent bfq_data.
+ * @new_ioprio: when an ioprio change is requested, the new ioprio value.
+ * @ioprio_class: the ioprio_class in use.
+ * @new_ioprio_class: when an ioprio_class change is requested, the new
+ * ioprio_class value.
* @new_bfqq: shared bfq_queue if queue is cooperating with
* one or more other queues.
- * @pos_node: request-position tree member (see bfq_data's @rq_pos_tree).
- * @pos_root: request-position tree root (see bfq_data's @rq_pos_tree).
+ * @pos_node: request-position tree member (see bfq_group's @rq_pos_tree).
+ * @pos_root: request-position tree root (see bfq_group's @rq_pos_tree).
* @sort_list: sorted list of pending requests.
* @next_rq: if fifo isn't expired, next request to serve.
* @queued: nr of requests queued in @sort_list.
@@ -239,6 +238,9 @@ struct bfq_queue {
atomic_t ref;
struct bfq_data *bfqd;
+ unsigned short ioprio, new_ioprio;
+ unsigned short ioprio_class, new_ioprio_class;
+
/* fields for cooperating queues handling */
struct bfq_queue *new_bfqq;
struct rb_node pos_node;
@@ -253,7 +255,7 @@ struct bfq_queue {
struct bfq_entity entity;
- unsigned long max_budget;
+ int max_budget;
unsigned long budget_timeout;
int dispatched;
@@ -302,6 +304,8 @@ struct bfq_ttime {
* @icq: associated io_cq structure
* @bfqq: array of two process queues, the sync and the async
* @ttime: associated @bfq_ttime struct
+ * @ioprio: per (request_queue, blkcg) ioprio.
+ * @blkcg_id: id of the blkcg the related io_cq belongs to.
* @wr_time_left: snapshot of the time left before weight raising ends
* for the sync queue associated to this process; this
* snapshot is taken to remember this value while the weight
@@ -329,6 +333,10 @@ struct bfq_io_cq {
struct bfq_ttime ttime;
int ioprio;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ uint64_t blkcg_id; /* the current blkcg ID */
+#endif
+
unsigned int wr_time_left;
bool saved_idle_window;
bool saved_IO_bound;
@@ -349,9 +357,6 @@ enum bfq_device_speed {
* struct bfq_data - per device data structure.
* @queue: request queue for the managed device.
* @root_group: root bfq_group for the device.
- * @rq_pos_tree: rbtree sorted by next_request position, used when
- * determining if two or more queues have interleaving
- * requests (see bfq_close_cooperator()).
* @active_numerous_groups: number of bfq_groups containing more than one
* active @bfq_entity.
* @queue_weights_tree: rbtree of weight counters of @bfq_queues, sorted by
@@ -485,9 +490,8 @@ struct bfq_data {
struct request_queue *queue;
struct bfq_group *root_group;
- struct rb_root rq_pos_tree;
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
int active_numerous_groups;
#endif
@@ -520,7 +524,7 @@ struct bfq_data {
ktime_t last_idling_start;
int peak_rate_samples;
u64 peak_rate;
- unsigned long bfq_max_budget;
+ int bfq_max_budget;
struct hlist_head group_list;
struct list_head active_list;
@@ -532,8 +536,8 @@ struct bfq_data {
unsigned int bfq_slice_idle;
u64 bfq_class_idle_last_service;
- unsigned int bfq_user_max_budget;
- unsigned int bfq_max_budget_async_rq;
+ int bfq_user_max_budget;
+ int bfq_max_budget_async_rq;
unsigned int bfq_timeout[2];
unsigned int bfq_coop_thresh;
@@ -593,15 +597,15 @@ enum bfqq_state_flags {
};
#define BFQ_BFQQ_FNS(name) \
-static inline void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \
+static void bfq_mark_bfqq_##name(struct bfq_queue *bfqq) \
{ \
(bfqq)->flags |= (1 << BFQ_BFQQ_FLAG_##name); \
} \
-static inline void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \
+static void bfq_clear_bfqq_##name(struct bfq_queue *bfqq) \
{ \
(bfqq)->flags &= ~(1 << BFQ_BFQQ_FLAG_##name); \
} \
-static inline int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
+static int bfq_bfqq_##name(const struct bfq_queue *bfqq) \
{ \
return ((bfqq)->flags & (1 << BFQ_BFQQ_FLAG_##name)) != 0; \
}
@@ -640,14 +644,64 @@ enum bfqq_expiration {
BFQ_BFQQ_NO_MORE_REQUESTS, /* the queue has no more requests */
};
-#ifdef CONFIG_CGROUP_BFQIO
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+
+struct bfqg_stats {
+ /* total bytes transferred */
+ struct blkg_rwstat service_bytes;
+ /* total IOs serviced, post merge */
+ struct blkg_rwstat serviced;
+ /* number of ios merged */
+ struct blkg_rwstat merged;
+ /* total time spent on device in ns, may not be accurate w/ queueing */
+ struct blkg_rwstat service_time;
+ /* total time spent waiting in scheduler queue in ns */
+ struct blkg_rwstat wait_time;
+ /* number of IOs queued up */
+ struct blkg_rwstat queued;
+ /* total sectors transferred */
+ struct blkg_stat sectors;
+ /* total disk time and nr sectors dispatched by this group */
+ struct blkg_stat time;
+ /* time not charged to this cgroup */
+ struct blkg_stat unaccounted_time;
+ /* sum of number of ios queued across all samples */
+ struct blkg_stat avg_queue_size_sum;
+ /* count of samples taken for average */
+ struct blkg_stat avg_queue_size_samples;
+ /* how many times this group has been removed from service tree */
+ struct blkg_stat dequeue;
+ /* total time spent waiting for it to be assigned a timeslice. */
+ struct blkg_stat group_wait_time;
+ /* time spent idling for this blkcg_gq */
+ struct blkg_stat idle_time;
+ /* total time with empty current active q with other requests queued */
+ struct blkg_stat empty_time;
+ /* fields after this shouldn't be cleared on stat reset */
+ uint64_t start_group_wait_time;
+ uint64_t start_idle_time;
+ uint64_t start_empty_time;
+ uint16_t flags;
+};
+
+/*
+ * struct bfq_group_data - per-blkcg storage for the blkio subsystem.
+ *
+ * @ps: @blkcg_policy_storage that this structure inherits
+ * @weight: weight of the bfq_group
+ */
+struct bfq_group_data {
+ /* must be the first member */
+ struct blkcg_policy_data pd;
+
+ unsigned short weight;
+};
+
/**
* struct bfq_group - per (device, cgroup) data structure.
* @entity: schedulable entity to insert into the parent group sched_data.
* @sched_data: own sched_data, to contain child entities (they may be
* both bfq_queues and bfq_groups).
- * @group_node: node to be inserted into the bfqio_cgroup->group_data
- * list of the containing cgroup's bfqio_cgroup.
* @bfqd_node: node to be inserted into the @bfqd->group_list list
* of the groups active on the same device; used for cleanup.
* @bfqd: the bfq_data for the device this group acts upon.
@@ -663,23 +717,26 @@ enum bfqq_expiration {
* are groups with more than one active @bfq_entity
* (see the comments to the function
* bfq_bfqq_must_not_expire()).
+ * @rq_pos_tree: rbtree sorted by next_request position, used when
+ * determining if two or more queues have interleaving
+ * requests (see bfq_find_close_cooperator()).
*
* Each (device, cgroup) pair has its own bfq_group, i.e., for each cgroup
* there is a set of bfq_groups, each one collecting the lower-level
* entities belonging to the group that are acting on the same device.
*
* Locking works as follows:
- * o @group_node is protected by the bfqio_cgroup lock, and is accessed
- * via RCU from its readers.
* o @bfqd is protected by the queue lock, RCU is used to access it
* from the readers.
* o All the other fields are protected by the @bfqd queue lock.
*/
struct bfq_group {
+ /* must be the first member */
+ struct blkg_policy_data pd;
+
struct bfq_entity entity;
struct bfq_sched_data sched_data;
- struct hlist_node group_node;
struct hlist_node bfqd_node;
void *bfqd;
@@ -690,44 +747,33 @@ struct bfq_group {
struct bfq_entity *my_entity;
int active_entities;
-};
-/**
- * struct bfqio_cgroup - bfq cgroup data structure.
- * @css: subsystem state for bfq in the containing cgroup.
- * @online: flag marked when the subsystem is inserted.
- * @weight: cgroup weight.
- * @ioprio: cgroup ioprio.
- * @ioprio_class: cgroup ioprio_class.
- * @lock: spinlock that protects @ioprio, @ioprio_class and @group_data.
- * @group_data: list containing the bfq_group belonging to this cgroup.
- *
- * @group_data is accessed using RCU, with @lock protecting the updates,
- * @ioprio and @ioprio_class are protected by @lock.
- */
-struct bfqio_cgroup {
- struct cgroup_subsys_state css;
- bool online;
-
- unsigned short weight, ioprio, ioprio_class;
+ struct rb_root rq_pos_tree;
- spinlock_t lock;
- struct hlist_head group_data;
+ struct bfqg_stats stats;
+ struct bfqg_stats dead_stats; /* stats pushed from dead children */
};
+
#else
struct bfq_group {
struct bfq_sched_data sched_data;
struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];
struct bfq_queue *async_idle_bfqq;
+
+ struct rb_root rq_pos_tree;
};
#endif
-static inline struct bfq_service_tree *
+static struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity);
+
+static struct bfq_service_tree *
bfq_entity_service_tree(struct bfq_entity *entity)
{
struct bfq_sched_data *sched_data = entity->sched_data;
- unsigned int idx = entity->ioprio_class - 1;
+ struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ unsigned int idx = bfqq ? bfqq->ioprio_class - 1 :
+ BFQ_DEFAULT_GRP_CLASS;
BUG_ON(idx >= BFQ_IOPRIO_CLASSES);
BUG_ON(sched_data == NULL);
@@ -735,19 +781,18 @@ bfq_entity_service_tree(struct bfq_entity *entity)
return sched_data->service_tree + idx;
}
-static inline struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic,
- bool is_sync)
+static struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
{
return bic->bfqq[is_sync];
}
-static inline void bic_set_bfqq(struct bfq_io_cq *bic,
- struct bfq_queue *bfqq, bool is_sync)
+static void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq,
+ bool is_sync)
{
bic->bfqq[is_sync] = bfqq;
}
-static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
+static struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
{
return bic->icq.q->elevator->elevator_data;
}
@@ -766,8 +811,7 @@ static inline struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
* the function returns NULL, with the queue unlocked, otherwise it
* returns the dereferenced pointer, with the queue locked.
*/
-static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,
- unsigned long *flags)
+static struct bfq_data *bfq_get_bfqd_locked(void **ptr, unsigned long *flags)
{
struct bfq_data *bfqd;
@@ -776,7 +820,9 @@ static inline struct bfq_data *bfq_get_bfqd_locked(void **ptr,
if (bfqd != NULL) {
spin_lock_irqsave(bfqd->queue->queue_lock, *flags);
- if (*ptr == bfqd)
+ if (ptr == NULL)
+ printk(KERN_CRIT "get_bfqd_locked pointer NULL\n");
+ else if (*ptr == bfqd)
goto out;
spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
}
@@ -787,17 +833,37 @@ out:
return bfqd;
}
-static inline void bfq_put_bfqd_unlock(struct bfq_data *bfqd,
- unsigned long *flags)
+static void bfq_put_bfqd_unlock(struct bfq_data *bfqd, unsigned long *flags)
{
spin_unlock_irqrestore(bfqd->queue->queue_lock, *flags);
}
-static void bfq_check_ioprio_change(struct bfq_io_cq *bic);
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+
+static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+{
+ struct bfq_entity *group_entity = bfqq->entity.parent;
+
+ if (!group_entity)
+ group_entity = &bfqq->bfqd->root_group->entity;
+
+ return container_of(group_entity, struct bfq_group, entity);
+}
+
+#else
+
+static struct bfq_group *bfq_bfqq_to_bfqg(struct bfq_queue *bfqq)
+{
+ return bfqq->bfqd->root_group;
+}
+
+#endif
+
+static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio);
static void bfq_put_queue(struct bfq_queue *bfqq);
static void bfq_dispatch_insert(struct request_queue *q, struct request *rq);
static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
- struct bfq_group *bfqg, int is_sync,
+ struct bio *bio, int is_sync,
struct bfq_io_cq *bic, gfp_t gfp_mask);
static void bfq_end_wr_async_queues(struct bfq_data *bfqd,
struct bfq_group *bfqg);