summaryrefslogtreecommitdiff
path: root/block/bfq-cgroup.c
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-12-15 14:52:16 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-12-15 14:52:16 -0300
commit8d91c1e411f55d7ea91b1183a2e9f8088fb4d5be (patch)
treee9891aa6c295060d065adffd610c4f49ecf884f3 /block/bfq-cgroup.c
parenta71852147516bc1cb5b0b3cbd13639bfd4022dc8 (diff)
Linux-libre 4.3.2-gnu
Diffstat (limited to 'block/bfq-cgroup.c')
-rw-r--r--block/bfq-cgroup.c1282
1 files changed, 553 insertions, 729 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index bc34d7a2b..11e2f1d4e 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -13,480 +13,254 @@
* file.
*/
-#ifdef CONFIG_BFQ_GROUP_IOSCHED
+#ifdef CONFIG_CGROUP_BFQIO
-/* bfqg stats flags */
-enum bfqg_stats_flags {
- BFQG_stats_waiting = 0,
- BFQG_stats_idling,
- BFQG_stats_empty,
-};
-
-#define BFQG_FLAG_FNS(name) \
-static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \
-{ \
- stats->flags |= (1 << BFQG_stats_##name); \
-} \
-static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \
-{ \
- stats->flags &= ~(1 << BFQG_stats_##name); \
-} \
-static int bfqg_stats_##name(struct bfqg_stats *stats) \
-{ \
- return (stats->flags & (1 << BFQG_stats_##name)) != 0; \
-} \
-
-BFQG_FLAG_FNS(waiting)
-BFQG_FLAG_FNS(idling)
-BFQG_FLAG_FNS(empty)
-#undef BFQG_FLAG_FNS
-
-/* This should be called with the queue_lock held. */
-static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
-{
- unsigned long long now;
-
- if (!bfqg_stats_waiting(stats))
- return;
-
- now = sched_clock();
- if (time_after64(now, stats->start_group_wait_time))
- blkg_stat_add(&stats->group_wait_time,
- now - stats->start_group_wait_time);
- bfqg_stats_clear_waiting(stats);
-}
-
-/* This should be called with the queue_lock held. */
-static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg,
- struct bfq_group *curr_bfqg)
-{
- struct bfqg_stats *stats = &bfqg->stats;
-
- if (bfqg_stats_waiting(stats))
- return;
- if (bfqg == curr_bfqg)
- return;
- stats->start_group_wait_time = sched_clock();
- bfqg_stats_mark_waiting(stats);
-}
-
-/* This should be called with the queue_lock held. */
-static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
-{
- unsigned long long now;
-
- if (!bfqg_stats_empty(stats))
- return;
-
- now = sched_clock();
- if (time_after64(now, stats->start_empty_time))
- blkg_stat_add(&stats->empty_time,
- now - stats->start_empty_time);
- bfqg_stats_clear_empty(stats);
-}
-
-static void bfqg_stats_update_dequeue(struct bfq_group *bfqg)
-{
- blkg_stat_add(&bfqg->stats.dequeue, 1);
-}
+static DEFINE_MUTEX(bfqio_mutex);
-static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg)
+static bool bfqio_is_removed(struct bfqio_cgroup *bgrp)
{
- struct bfqg_stats *stats = &bfqg->stats;
-
- if (blkg_rwstat_total(&stats->queued))
- return;
-
- /*
- * group is already marked empty. This can happen if bfqq got new
- * request in parent group and moved to this group while being added
- * to service tree. Just ignore the event and move on.
- */
- if (bfqg_stats_empty(stats))
- return;
-
- stats->start_empty_time = sched_clock();
- bfqg_stats_mark_empty(stats);
+ return bgrp ? !bgrp->online : false;
}
-static void bfqg_stats_update_idle_time(struct bfq_group *bfqg)
-{
- struct bfqg_stats *stats = &bfqg->stats;
-
- if (bfqg_stats_idling(stats)) {
- unsigned long long now = sched_clock();
-
- if (time_after64(now, stats->start_idle_time))
- blkg_stat_add(&stats->idle_time,
- now - stats->start_idle_time);
- bfqg_stats_clear_idling(stats);
- }
-}
-
-static void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg)
-{
- struct bfqg_stats *stats = &bfqg->stats;
-
- stats->start_idle_time = sched_clock();
- bfqg_stats_mark_idling(stats);
-}
-
-static void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg)
-{
- struct bfqg_stats *stats = &bfqg->stats;
-
- blkg_stat_add(&stats->avg_queue_size_sum,
- blkg_rwstat_total(&stats->queued));
- blkg_stat_add(&stats->avg_queue_size_samples, 1);
- bfqg_stats_update_group_wait_time(stats);
-}
-
-static struct blkcg_policy blkcg_policy_bfq;
-
-/*
- * blk-cgroup policy-related handlers
- * The following functions help in converting between blk-cgroup
- * internal structures and BFQ-specific structures.
- */
-
-static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd)
-{
- return pd ? container_of(pd, struct bfq_group, pd) : NULL;
-}
+static struct bfqio_cgroup bfqio_root_cgroup = {
+ .weight = BFQ_DEFAULT_GRP_WEIGHT,
+ .ioprio = BFQ_DEFAULT_GRP_IOPRIO,
+ .ioprio_class = BFQ_DEFAULT_GRP_CLASS,
+};
-static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg)
+static inline void bfq_init_entity(struct bfq_entity *entity,
+ struct bfq_group *bfqg)
{
- return pd_to_blkg(&bfqg->pd);
+ entity->weight = entity->new_weight;
+ entity->orig_weight = entity->new_weight;
+ entity->ioprio = entity->new_ioprio;
+ entity->ioprio_class = entity->new_ioprio_class;
+ entity->parent = bfqg->my_entity;
+ entity->sched_data = &bfqg->sched_data;
}
-static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg)
+static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css)
{
- return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq));
+ return css ? container_of(css, struct bfqio_cgroup, css) : NULL;
}
/*
- * bfq_group handlers
- * The following functions help in navigating the bfq_group hierarchy
- * by allowing to find the parent of a bfq_group or the bfq_group
- * associated to a bfq_queue.
+ * Search the bfq_group for bfqd into the hash table (by now only a list)
+ * of bgrp. Must be called under rcu_read_lock().
*/
-
-static struct bfq_group *bfqg_parent(struct bfq_group *bfqg)
+static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp,
+ struct bfq_data *bfqd)
{
- struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent;
-
- return pblkg ? blkg_to_bfqg(pblkg) : NULL;
-}
-
-static struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
-{
- struct bfq_entity *group_entity = bfqq->entity.parent;
-
- return group_entity ? container_of(group_entity, struct bfq_group,
- entity) :
- bfqq->bfqd->root_group;
-}
-
-/*
- * The following two functions handle get and put of a bfq_group by
- * wrapping the related blk-cgroup hooks.
- */
-
-static void bfqg_get(struct bfq_group *bfqg)
-{
- return blkg_get(bfqg_to_blkg(bfqg));
-}
-
-static void bfqg_put(struct bfq_group *bfqg)
-{
- return blkg_put(bfqg_to_blkg(bfqg));
-}
+ struct bfq_group *bfqg;
+ void *key;
-static void bfqg_stats_update_io_add(struct bfq_group *bfqg,
- struct bfq_queue *bfqq,
- int rw)
-{
- blkg_rwstat_add(&bfqg->stats.queued, rw, 1);
- bfqg_stats_end_empty_time(&bfqg->stats);
- if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue))
- bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq));
-}
+ hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) {
+ key = rcu_dereference(bfqg->bfqd);
+ if (key == bfqd)
+ return bfqg;
+ }
-static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw)
-{
- blkg_rwstat_add(&bfqg->stats.queued, rw, -1);
+ return NULL;
}
-static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw)
+static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp,
+ struct bfq_group *bfqg)
{
- blkg_rwstat_add(&bfqg->stats.merged, rw, 1);
-}
+ struct bfq_entity *entity = &bfqg->entity;
-static void bfqg_stats_update_dispatch(struct bfq_group *bfqg,
- uint64_t bytes, int rw)
-{
- blkg_stat_add(&bfqg->stats.sectors, bytes >> 9);
- blkg_rwstat_add(&bfqg->stats.serviced, rw, 1);
- blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes);
+ /*
+ * If the weight of the entity has never been set via the sysfs
+ * interface, then bgrp->weight == 0. In this case we initialize
+ * the weight from the current ioprio value. Otherwise, the group
+ * weight, if set, has priority over the ioprio value.
+ */
+ if (bgrp->weight == 0) {
+ entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio);
+ entity->new_ioprio = bgrp->ioprio;
+ } else {
+ if (bgrp->weight < BFQ_MIN_WEIGHT ||
+ bgrp->weight > BFQ_MAX_WEIGHT) {
+ printk(KERN_CRIT "bfq_group_init_entity: "
+ "bgrp->weight %d\n", bgrp->weight);
+ BUG();
+ }
+ entity->new_weight = bgrp->weight;
+ entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight);
+ }
+ entity->orig_weight = entity->weight = entity->new_weight;
+ entity->ioprio = entity->new_ioprio;
+ entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class;
+ entity->my_sched_data = &bfqg->sched_data;
+ bfqg->active_entities = 0;
}
-static void bfqg_stats_update_completion(struct bfq_group *bfqg,
- uint64_t start_time, uint64_t io_start_time, int rw)
+static inline void bfq_group_set_parent(struct bfq_group *bfqg,
+ struct bfq_group *parent)
{
- struct bfqg_stats *stats = &bfqg->stats;
- unsigned long long now = sched_clock();
-
- if (time_after64(now, io_start_time))
- blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
- if (time_after64(io_start_time, start_time))
- blkg_rwstat_add(&stats->wait_time, rw,
- io_start_time - start_time);
-}
+ struct bfq_entity *entity;
-/* @stats = 0 */
-static void bfqg_stats_reset(struct bfqg_stats *stats)
-{
- if (!stats)
- return;
-
- /* queued stats shouldn't be cleared */
- blkg_rwstat_reset(&stats->service_bytes);
- blkg_rwstat_reset(&stats->serviced);
- blkg_rwstat_reset(&stats->merged);
- blkg_rwstat_reset(&stats->service_time);
- blkg_rwstat_reset(&stats->wait_time);
- blkg_stat_reset(&stats->time);
- blkg_stat_reset(&stats->unaccounted_time);
- blkg_stat_reset(&stats->avg_queue_size_sum);
- blkg_stat_reset(&stats->avg_queue_size_samples);
- blkg_stat_reset(&stats->dequeue);
- blkg_stat_reset(&stats->group_wait_time);
- blkg_stat_reset(&stats->idle_time);
- blkg_stat_reset(&stats->empty_time);
-}
+ BUG_ON(parent == NULL);
+ BUG_ON(bfqg == NULL);
-/* @to += @from */
-static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from)
-{
- if (!to || !from)
- return;
-
- /* queued stats shouldn't be cleared */
- blkg_rwstat_merge(&to->service_bytes, &from->service_bytes);
- blkg_rwstat_merge(&to->serviced, &from->serviced);
- blkg_rwstat_merge(&to->merged, &from->merged);
- blkg_rwstat_merge(&to->service_time, &from->service_time);
- blkg_rwstat_merge(&to->wait_time, &from->wait_time);
- blkg_stat_merge(&from->time, &from->time);
- blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time);
- blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
- blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
- blkg_stat_merge(&to->dequeue, &from->dequeue);
- blkg_stat_merge(&to->group_wait_time, &from->group_wait_time);
- blkg_stat_merge(&to->idle_time, &from->idle_time);
- blkg_stat_merge(&to->empty_time, &from->empty_time);
+ entity = &bfqg->entity;
+ entity->parent = parent->my_entity;
+ entity->sched_data = &parent->sched_data;
}
-/*
- * Transfer @bfqg's stats to its parent's dead_stats so that the ancestors'
- * recursive stats can still account for the amount used by this bfqg after
- * it's gone.
+/**
+ * bfq_group_chain_alloc - allocate a chain of groups.
+ * @bfqd: queue descriptor.
+ * @css: the leaf cgroup_subsys_state this chain starts from.
+ *
+ * Allocate a chain of groups starting from the one belonging to
+ * @cgroup up to the root cgroup. Stop if a cgroup on the chain
+ * to the root has already an allocated group on @bfqd.
*/
-static void bfqg_stats_xfer_dead(struct bfq_group *bfqg)
+static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd,
+ struct cgroup_subsys_state *css)
{
- struct bfq_group *parent;
-
- if (!bfqg) /* root_group */
- return;
+ struct bfqio_cgroup *bgrp;
+ struct bfq_group *bfqg, *prev = NULL, *leaf = NULL;
- parent = bfqg_parent(bfqg);
+ for (; css != NULL; css = css->parent) {
+ bgrp = css_to_bfqio(css);
- lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock);
-
- if (unlikely(!parent))
- return;
+ bfqg = bfqio_lookup_group(bgrp, bfqd);
+ if (bfqg != NULL) {
+ /*
+ * All the cgroups in the path from there to the
+ * root must have a bfq_group for bfqd, so we don't
+ * need any more allocations.
+ */
+ break;
+ }
- bfqg_stats_merge(&parent->dead_stats, &bfqg->stats);
- bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats);
- bfqg_stats_reset(&bfqg->stats);
- bfqg_stats_reset(&bfqg->dead_stats);
-}
+ bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC);
+ if (bfqg == NULL)
+ goto cleanup;
-static void bfq_init_entity(struct bfq_entity *entity,
- struct bfq_group *bfqg)
-{
- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ bfq_group_init_entity(bgrp, bfqg);
+ bfqg->my_entity = &bfqg->entity;
- entity->weight = entity->new_weight;
- entity->orig_weight = entity->new_weight;
- if (bfqq) {
- bfqq->ioprio = bfqq->new_ioprio;
- bfqq->ioprio_class = bfqq->new_ioprio_class;
- bfqg_get(bfqg);
+ if (leaf == NULL) {
+ leaf = bfqg;
+ prev = leaf;
+ } else {
+ bfq_group_set_parent(prev, bfqg);
+ /*
+ * Build a list of allocated nodes using the bfqd
+ * filed, that is still unused and will be
+ * initialized only after the node will be
+ * connected.
+ */
+ prev->bfqd = bfqg;
+ prev = bfqg;
+ }
}
- entity->parent = bfqg->my_entity;
- entity->sched_data = &bfqg->sched_data;
-}
-
-static void bfqg_stats_init(struct bfqg_stats *stats)
-{
- blkg_rwstat_init(&stats->service_bytes);
- blkg_rwstat_init(&stats->serviced);
- blkg_rwstat_init(&stats->merged);
- blkg_rwstat_init(&stats->service_time);
- blkg_rwstat_init(&stats->wait_time);
- blkg_rwstat_init(&stats->queued);
-
- blkg_stat_init(&stats->sectors);
- blkg_stat_init(&stats->time);
-
- blkg_stat_init(&stats->unaccounted_time);
- blkg_stat_init(&stats->avg_queue_size_sum);
- blkg_stat_init(&stats->avg_queue_size_samples);
- blkg_stat_init(&stats->dequeue);
- blkg_stat_init(&stats->group_wait_time);
- blkg_stat_init(&stats->idle_time);
- blkg_stat_init(&stats->empty_time);
-}
-
-static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd)
- {
- return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL;
- }
-
-static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg)
-{
- return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq));
-}
-
-static void bfq_cpd_init(const struct blkcg *blkcg)
-{
- struct bfq_group_data *d =
- cpd_to_bfqgd(blkcg->pd[blkcg_policy_bfq.plid]);
-
- d->weight = BFQ_DEFAULT_GRP_WEIGHT;
-}
-
-static void bfq_pd_init(struct blkcg_gq *blkg)
-{
- struct bfq_group *bfqg = blkg_to_bfqg(blkg);
- struct bfq_data *bfqd = blkg->q->elevator->elevator_data;
- struct bfq_entity *entity = &bfqg->entity;
- struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg);
- entity->orig_weight = entity->weight = entity->new_weight = d->weight;
- entity->my_sched_data = &bfqg->sched_data;
- bfqg->my_entity = entity; /*
- * the root_group's will be set to NULL
- * in bfq_init_queue()
- */
- bfqg->bfqd = bfqd;
- bfqg->active_entities = 0;
- bfqg->rq_pos_tree = RB_ROOT;
+ return leaf;
- /* if the root_group does not exist, we are handling it right now */
- if (bfqd->root_group && bfqg != bfqd->root_group)
- hlist_add_head(&bfqg->bfqd_node, &bfqd->group_list);
+cleanup:
+ while (leaf != NULL) {
+ prev = leaf;
+ leaf = leaf->bfqd;
+ kfree(prev);
+ }
- bfqg_stats_init(&bfqg->stats);
- bfqg_stats_init(&bfqg->dead_stats);
+ return NULL;
}
-/* offset delta from bfqg->stats to bfqg->dead_stats */
-static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) -
- offsetof(struct bfq_group, stats);
-
-/* to be used by recursive prfill, sums live and dead stats recursively */
-static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
+/**
+ * bfq_group_chain_link - link an allocated group chain to a cgroup
+ * hierarchy.
+ * @bfqd: the queue descriptor.
+ * @css: the leaf cgroup_subsys_state to start from.
+ * @leaf: the leaf group (to be associated to @cgroup).
+ *
+ * Try to link a chain of groups to a cgroup hierarchy, connecting the
+ * nodes bottom-up, so we can be sure that when we find a cgroup in the
+ * hierarchy that already as a group associated to @bfqd all the nodes
+ * in the path to the root cgroup have one too.
+ *
+ * On locking: the queue lock protects the hierarchy (there is a hierarchy
+ * per device) while the bfqio_cgroup lock protects the list of groups
+ * belonging to the same cgroup.
+ */
+static void bfq_group_chain_link(struct bfq_data *bfqd,
+ struct cgroup_subsys_state *css,
+ struct bfq_group *leaf)
{
- u64 sum = 0;
+ struct bfqio_cgroup *bgrp;
+ struct bfq_group *bfqg, *next, *prev = NULL;
+ unsigned long flags;
- sum += blkg_stat_recursive_sum(pd, off);
- sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta);
- return sum;
-}
+ assert_spin_locked(bfqd->queue->queue_lock);
-/* to be used by recursive prfill, sums live and dead rwstats recursively */
-static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
- int off)
-{
- struct blkg_rwstat a, b;
+ for (; css != NULL && leaf != NULL; css = css->parent) {
+ bgrp = css_to_bfqio(css);
+ next = leaf->bfqd;
- a = blkg_rwstat_recursive_sum(pd, off);
- b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta);
- blkg_rwstat_merge(&a, &b);
- return a;
-}
+ bfqg = bfqio_lookup_group(bgrp, bfqd);
+ BUG_ON(bfqg != NULL);
-static void bfq_pd_reset_stats(struct blkcg_gq *blkg)
-{
- struct bfq_group *bfqg = blkg_to_bfqg(blkg);
+ spin_lock_irqsave(&bgrp->lock, flags);
- bfqg_stats_reset(&bfqg->stats);
- bfqg_stats_reset(&bfqg->dead_stats);
-}
+ rcu_assign_pointer(leaf->bfqd, bfqd);
+ hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data);
+ hlist_add_head(&leaf->bfqd_node, &bfqd->group_list);
-static void bfq_group_set_parent(struct bfq_group *bfqg,
- struct bfq_group *parent)
-{
- struct bfq_entity *entity;
+ spin_unlock_irqrestore(&bgrp->lock, flags);
- BUG_ON(!parent);
- BUG_ON(!bfqg);
- BUG_ON(bfqg == parent);
+ prev = leaf;
+ leaf = next;
+ }
- entity = &bfqg->entity;
- entity->parent = parent->my_entity;
- entity->sched_data = &parent->sched_data;
+ BUG_ON(css == NULL && leaf != NULL);
+ if (css != NULL && prev != NULL) {
+ bgrp = css_to_bfqio(css);
+ bfqg = bfqio_lookup_group(bgrp, bfqd);
+ bfq_group_set_parent(prev, bfqg);
+ }
}
+/**
+ * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup.
+ * @bfqd: queue descriptor.
+ * @cgroup: cgroup being searched for.
+ *
+ * Return a group associated to @bfqd in @cgroup, allocating one if
+ * necessary. When a group is returned all the cgroups in the path
+ * to the root have a group associated to @bfqd.
+ *
+ * If the allocation fails, return the root group: this breaks guarantees
+ * but is a safe fallback. If this loss becomes a problem it can be
+ * mitigated using the equivalent weight (given by the product of the
+ * weights of the groups in the path from @group to the root) in the
+ * root scheduler.
+ *
+ * We allocate all the missing nodes in the path from the leaf cgroup
+ * to the root and we connect the nodes only after all the allocations
+ * have been successful.
+ */
static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
- struct blkcg *blkcg)
+ struct cgroup_subsys_state *css)
{
- struct request_queue *q = bfqd->queue;
- struct bfq_group *bfqg = NULL, *parent;
- struct bfq_entity *entity = NULL;
+ struct bfqio_cgroup *bgrp = css_to_bfqio(css);
+ struct bfq_group *bfqg;
- assert_spin_locked(bfqd->queue->queue_lock);
+ bfqg = bfqio_lookup_group(bgrp, bfqd);
+ if (bfqg != NULL)
+ return bfqg;
- /* avoid lookup for the common case where there's no blkcg */
- if (blkcg == &blkcg_root) {
+ bfqg = bfq_group_chain_alloc(bfqd, css);
+ if (bfqg != NULL)
+ bfq_group_chain_link(bfqd, css, bfqg);
+ else
bfqg = bfqd->root_group;
- } else {
- struct blkcg_gq *blkg;
-
- blkg = blkg_lookup_create(blkcg, q);
- if (!IS_ERR(blkg))
- bfqg = blkg_to_bfqg(blkg);
- else /* fallback to root_group */
- bfqg = bfqd->root_group;
- }
-
- BUG_ON(!bfqg);
-
- /*
- * Update chain of bfq_groups as we might be handling a leaf group
- * which, along with some of its relatives, has not been hooked yet
- * to the private hierarchy of BFQ.
- */
- entity = &bfqg->entity;
- for_each_entity(entity) {
- bfqg = container_of(entity, struct bfq_group, entity);
- BUG_ON(!bfqg);
- if (bfqg != bfqd->root_group) {
- parent = bfqg_parent(bfqg);
- if (!parent)
- parent = bfqd->root_group;
- BUG_ON(!parent);
- bfq_group_set_parent(bfqg, parent);
- }
- }
return bfqg;
}
-static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
-
/**
* bfq_bfqq_move - migrate @bfqq to @bfqg.
* @bfqd: queue descriptor.
@@ -522,7 +296,6 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bfq_deactivate_bfqq(bfqd, bfqq, 0);
} else if (entity->on_st)
bfq_put_idle_entity(bfq_entity_service_tree(entity), entity);
- bfqg_put(bfqq_group(bfqq));
/*
* Here we use a reference to bfqg. We don't need a refcounter
@@ -531,15 +304,11 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
entity->parent = bfqg->my_entity;
entity->sched_data = &bfqg->sched_data;
- bfqg_get(bfqg);
- if (busy) {
- bfq_pos_tree_add_move(bfqd, bfqq);
- if (resume)
- bfq_activate_bfqq(bfqd, bfqq);
- }
+ if (busy && resume)
+ bfq_activate_bfqq(bfqd, bfqq);
- if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
+ if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver)
bfq_schedule_dispatch(bfqd);
}
@@ -547,9 +316,9 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
* __bfq_bic_change_cgroup - move @bic to @cgroup.
* @bfqd: the queue descriptor.
* @bic: the bic to move.
- * @blkcg: the blk-cgroup to move to.
+ * @cgroup: the cgroup to move to.
*
- * Move bic to blkcg, assuming that bfqd->queue is locked; the caller
+ * Move bic to cgroup, assuming that bfqd->queue is locked; the caller
* has to make sure that the reference to cgroup is valid across the call.
*
* NOTE: an alternative approach might have been to store the current
@@ -558,17 +327,18 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
*/
static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
struct bfq_io_cq *bic,
- struct blkcg *blkcg)
+ struct cgroup_subsys_state *css)
{
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0);
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1);
- struct bfq_group *bfqg;
struct bfq_entity *entity;
+ struct bfq_group *bfqg;
+ struct bfqio_cgroup *bgrp;
- lockdep_assert_held(bfqd->queue->queue_lock);
+ bgrp = css_to_bfqio(css);
- bfqg = bfq_find_alloc_group(bfqd, blkcg);
- if (async_bfqq) {
+ bfqg = bfq_find_alloc_group(bfqd, css);
+ if (async_bfqq != NULL) {
entity = &async_bfqq->entity;
if (entity->sched_data != &bfqg->sched_data) {
@@ -580,7 +350,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
}
}
- if (sync_bfqq) {
+ if (sync_bfqq != NULL) {
entity = &sync_bfqq->entity;
if (entity->sched_data != &bfqg->sched_data)
bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg);
@@ -589,39 +359,74 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
return bfqg;
}
-static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+/**
+ * bfq_bic_change_cgroup - move @bic to @cgroup.
+ * @bic: the bic being migrated.
+ * @cgroup: the destination cgroup.
+ *
+ * When the task owning @bic is moved to @cgroup, @bic is immediately
+ * moved into its new parent group.
+ */
+static void bfq_bic_change_cgroup(struct bfq_io_cq *bic,
+ struct cgroup_subsys_state *css)
+{
+ struct bfq_data *bfqd;
+ unsigned long uninitialized_var(flags);
+
+ bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data),
+ &flags);
+ if (bfqd != NULL) {
+ __bfq_bic_change_cgroup(bfqd, bic, css);
+ bfq_put_bfqd_unlock(bfqd, &flags);
+ }
+}
+
+/**
+ * bfq_bic_update_cgroup - update the cgroup of @bic.
+ * @bic: the @bic to update.
+ *
+ * Make sure that @bic is enqueued in the cgroup of the current task.
+ * We need this in addition to moving bics during the cgroup attach
+ * phase because the task owning @bic could be at its first disk
+ * access or we may end up in the root cgroup as the result of a
+ * memory allocation failure and here we try to move to the right
+ * group.
+ *
+ * Must be called under the queue lock. It is safe to use the returned
+ * value even after the rcu_read_unlock() as the migration/destruction
+ * paths act under the queue lock too. IOW it is impossible to race with
+ * group migration/destruction and end up with an invalid group as:
+ * a) here cgroup has not yet been destroyed, nor its destroy callback
+ * has started execution, as current holds a reference to it,
+ * b) if it is destroyed after rcu_read_unlock() [after current is
+ * migrated to a different cgroup] its attach() callback will have
+ * taken care of remove all the references to the old cgroup data.
+ */
+static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic)
{
struct bfq_data *bfqd = bic_to_bfqd(bic);
- struct blkcg *blkcg;
- struct bfq_group *bfqg = NULL;
- uint64_t id;
+ struct bfq_group *bfqg;
+ struct cgroup_subsys_state *css;
+
+ BUG_ON(bfqd == NULL);
rcu_read_lock();
- blkcg = bio_blkcg(bio);
- id = blkcg->css.serial_nr;
+ css = task_css(current, bfqio_cgrp_id);
+ bfqg = __bfq_bic_change_cgroup(bfqd, bic, css);
rcu_read_unlock();
- /*
- * Check whether blkcg has changed. The condition may trigger
- * spuriously on a newly created cic but there's no harm.
- */
- if (unlikely(!bfqd) || likely(bic->blkcg_id == id))
- return;
-
- bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg);
- BUG_ON(!bfqg);
- bic->blkcg_id = id;
+ return bfqg;
}
/**
* bfq_flush_idle_tree - deactivate any entity on the idle tree of @st.
* @st: the service tree being flushed.
*/
-static void bfq_flush_idle_tree(struct bfq_service_tree *st)
+static inline void bfq_flush_idle_tree(struct bfq_service_tree *st)
{
struct bfq_entity *entity = st->first_idle;
- for (; entity ; entity = st->first_idle)
+ for (; entity != NULL; entity = st->first_idle)
__bfq_deactivate_entity(entity, 0);
}
@@ -630,12 +435,12 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st)
* @bfqd: the device data structure with the root group.
* @entity: the entity to move.
*/
-static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
- struct bfq_entity *entity)
+static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
+ struct bfq_entity *entity)
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
- BUG_ON(!bfqq);
+ BUG_ON(bfqq == NULL);
bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group);
return;
}
@@ -649,9 +454,9 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd,
*
* Needs queue_lock to be taken and reference to be valid over the call.
*/
-static void bfq_reparent_active_entities(struct bfq_data *bfqd,
- struct bfq_group *bfqg,
- struct bfq_service_tree *st)
+static inline void bfq_reparent_active_entities(struct bfq_data *bfqd,
+ struct bfq_group *bfqg,
+ struct bfq_service_tree *st)
{
struct rb_root *active = &st->active;
struct bfq_entity *entity = NULL;
@@ -659,10 +464,10 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd,
if (!RB_EMPTY_ROOT(&st->active))
entity = bfq_entity_of(rb_first(active));
- for (; entity ; entity = bfq_entity_of(rb_first(active)))
+ for (; entity != NULL; entity = bfq_entity_of(rb_first(active)))
bfq_reparent_leaf_entity(bfqd, entity);
- if (bfqg->sched_data.in_service_entity)
+ if (bfqg->sched_data.in_service_entity != NULL)
bfq_reparent_leaf_entity(bfqd,
bfqg->sched_data.in_service_entity);
@@ -671,21 +476,20 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd,
/**
* bfq_destroy_group - destroy @bfqg.
+ * @bgrp: the bfqio_cgroup containing @bfqg.
* @bfqg: the group being destroyed.
*
* Destroy @bfqg, making sure that it is not referenced from its parent.
- * blkio already grabs the queue_lock for us, so no need to use RCU-based magic
*/
-static void bfq_pd_offline(struct blkcg_gq *blkg)
+static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg)
{
+ struct bfq_data *bfqd;
struct bfq_service_tree *st;
- struct bfq_group *bfqg = blkg_to_bfqg(blkg);
- struct bfq_data *bfqd = bfqg->bfqd;
struct bfq_entity *entity = bfqg->my_entity;
+ unsigned long uninitialized_var(flags);
int i;
- if (!entity) /* root group */
- return;
+ hlist_del(&bfqg->group_node);
/*
* Empty all service_trees belonging to this group before
@@ -714,19 +518,37 @@ static void bfq_pd_offline(struct blkcg_gq *blkg)
* There is no need to put the sync queues, as the
* scheduler has taken no reference.
*/
- bfq_reparent_active_entities(bfqd, bfqg, st);
+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
+ if (bfqd != NULL) {
+ bfq_reparent_active_entities(bfqd, bfqg, st);
+ bfq_put_bfqd_unlock(bfqd, &flags);
+ }
BUG_ON(!RB_EMPTY_ROOT(&st->active));
BUG_ON(!RB_EMPTY_ROOT(&st->idle));
}
- BUG_ON(bfqg->sched_data.next_in_service);
- BUG_ON(bfqg->sched_data.in_service_entity);
+ BUG_ON(bfqg->sched_data.next_in_service != NULL);
+ BUG_ON(bfqg->sched_data.in_service_entity != NULL);
- hlist_del(&bfqg->bfqd_node);
- __bfq_deactivate_entity(entity, 0);
- bfq_put_async_queues(bfqd, bfqg);
- BUG_ON(entity->tree);
+ /*
+ * We may race with device destruction, take extra care when
+ * dereferencing bfqg->bfqd.
+ */
+ bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags);
+ if (bfqd != NULL) {
+ hlist_del(&bfqg->bfqd_node);
+ __bfq_deactivate_entity(entity, 0);
+ bfq_put_async_queues(bfqd, bfqg);
+ bfq_put_bfqd_unlock(bfqd, &flags);
+ }
+ BUG_ON(entity->tree != NULL);
- bfqg_stats_xfer_dead(bfqg);
+ /*
+ * No need to defer the kfree() to the end of the RCU grace
+ * period: we are called from the destroy() callback of our
+ * cgroup, so we can be sure that no one is a) still using
+ * this cgroup or b) doing lookups in it.
+ */
+ kfree(bfqg);
}
static void bfq_end_wr_async(struct bfq_data *bfqd)
@@ -773,309 +595,312 @@ static void bfq_disconnect_groups(struct bfq_data *bfqd)
}
}
-static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css,
- struct cftype *cftype)
+static inline void bfq_free_root_group(struct bfq_data *bfqd)
{
- struct blkcg *blkcg = css_to_blkcg(css);
- struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
- int ret = -EINVAL;
+ struct bfqio_cgroup *bgrp = &bfqio_root_cgroup;
+ struct bfq_group *bfqg = bfqd->root_group;
+
+ bfq_put_async_queues(bfqd, bfqg);
- spin_lock_irq(&blkcg->lock);
- ret = bfqgd->weight;
- spin_unlock_irq(&blkcg->lock);
+ spin_lock_irq(&bgrp->lock);
+ hlist_del_rcu(&bfqg->group_node);
+ spin_unlock_irq(&bgrp->lock);
- return ret;
+ /*
+ * No need to synchronize_rcu() here: since the device is gone
+ * there cannot be any read-side access to its root_group.
+ */
+ kfree(bfqg);
}
-static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css,
- struct cftype *cftype,
- u64 val)
+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
{
- struct blkcg *blkcg = css_to_blkcg(css);
- struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
- struct blkcg_gq *blkg;
- int ret = -EINVAL;
-
- if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT)
- return ret;
-
- ret = 0;
- spin_lock_irq(&blkcg->lock);
- bfqgd->weight = (unsigned short)val;
- hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
- struct bfq_group *bfqg = blkg_to_bfqg(blkg);
- if (!bfqg)
- continue;
- /*
- * Setting the prio_changed flag of the entity
- * to 1 with new_weight == weight would re-set
- * the value of the weight to its ioprio mapping.
- * Set the flag only if necessary.
- */
- if ((unsigned short)val != bfqg->entity.new_weight) {
- bfqg->entity.new_weight = (unsigned short)val;
- /*
- * Make sure that the above new value has been
- * stored in bfqg->entity.new_weight before
- * setting the prio_changed flag. In fact,
- * this flag may be read asynchronously (in
- * critical sections protected by a different
- * lock than that held here), and finding this
- * flag set may cause the execution of the code
- * for updating parameters whose value may
- * depend also on bfqg->entity.new_weight (in
- * __bfq_entity_update_weight_prio).
- * This barrier makes sure that the new value
- * of bfqg->entity.new_weight is correctly
- * seen in that code.
- */
- smp_wmb();
- bfqg->entity.prio_changed = 1;
- }
- }
- spin_unlock_irq(&blkcg->lock);
+ struct bfq_group *bfqg;
+ struct bfqio_cgroup *bgrp;
+ int i;
- return ret;
-}
+ bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node);
+ if (bfqg == NULL)
+ return NULL;
-static int bfqg_print_stat(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
- &blkcg_policy_bfq, seq_cft(sf)->private, false);
- return 0;
-}
+ bfqg->entity.parent = NULL;
+ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)
+ bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT;
-static int bfqg_print_rwstat(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
- &blkcg_policy_bfq, seq_cft(sf)->private, true);
- return 0;
+ bgrp = &bfqio_root_cgroup;
+ spin_lock_irq(&bgrp->lock);
+ rcu_assign_pointer(bfqg->bfqd, bfqd);
+ hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data);
+ spin_unlock_irq(&bgrp->lock);
+
+ return bfqg;
}
-static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- u64 sum = bfqg_stat_pd_recursive_sum(pd, off);
+#define SHOW_FUNCTION(__VAR) \
+static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \
+ struct cftype *cftype) \
+{ \
+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \
+ u64 ret = -ENODEV; \
+ \
+ mutex_lock(&bfqio_mutex); \
+ if (bfqio_is_removed(bgrp)) \
+ goto out_unlock; \
+ \
+ spin_lock_irq(&bgrp->lock); \
+ ret = bgrp->__VAR; \
+ spin_unlock_irq(&bgrp->lock); \
+ \
+out_unlock: \
+ mutex_unlock(&bfqio_mutex); \
+ return ret; \
+}
+
+SHOW_FUNCTION(weight);
+SHOW_FUNCTION(ioprio);
+SHOW_FUNCTION(ioprio_class);
+#undef SHOW_FUNCTION
+
+#define STORE_FUNCTION(__VAR, __MIN, __MAX) \
+static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\
+ struct cftype *cftype, \
+ u64 val) \
+{ \
+ struct bfqio_cgroup *bgrp = css_to_bfqio(css); \
+ struct bfq_group *bfqg; \
+ int ret = -EINVAL; \
+ \
+ if (val < (__MIN) || val > (__MAX)) \
+ return ret; \
+ \
+ ret = -ENODEV; \
+ mutex_lock(&bfqio_mutex); \
+ if (bfqio_is_removed(bgrp)) \
+ goto out_unlock; \
+ ret = 0; \
+ \
+ spin_lock_irq(&bgrp->lock); \
+ bgrp->__VAR = (unsigned short)val; \
+ hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) { \
+ /* \
+ * Setting the ioprio_changed flag of the entity \
+ * to 1 with new_##__VAR == ##__VAR would re-set \
+ * the value of the weight to its ioprio mapping. \
+ * Set the flag only if necessary. \
+ */ \
+ if ((unsigned short)val != bfqg->entity.new_##__VAR) { \
+ bfqg->entity.new_##__VAR = (unsigned short)val; \
+ /* \
+ * Make sure that the above new value has been \
+ * stored in bfqg->entity.new_##__VAR before \
+ * setting the ioprio_changed flag. In fact, \
+ * this flag may be read asynchronously (in \
+ * critical sections protected by a different \
+ * lock than that held here), and finding this \
+ * flag set may cause the execution of the code \
+ * for updating parameters whose value may \
+ * depend also on bfqg->entity.new_##__VAR (in \
+ * __bfq_entity_update_weight_prio). \
+ * This barrier makes sure that the new value \
+ * of bfqg->entity.new_##__VAR is correctly \
+ * seen in that code. \
+ */ \
+ smp_wmb(); \
+ bfqg->entity.ioprio_changed = 1; \
+ } \
+ } \
+ spin_unlock_irq(&bgrp->lock); \
+ \
+out_unlock: \
+ mutex_unlock(&bfqio_mutex); \
+ return ret; \
+}
+
+STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT);
+STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1);
+STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE);
+#undef STORE_FUNCTION
- return __blkg_prfill_u64(sf, pd, sum);
-}
+static struct cftype bfqio_files[] = {
+ {
+ .name = "weight",
+ .read_u64 = bfqio_cgroup_weight_read,
+ .write_u64 = bfqio_cgroup_weight_write,
+ },
+ {
+ .name = "ioprio",
+ .read_u64 = bfqio_cgroup_ioprio_read,
+ .write_u64 = bfqio_cgroup_ioprio_write,
+ },
+ {
+ .name = "ioprio_class",
+ .read_u64 = bfqio_cgroup_ioprio_class_read,
+ .write_u64 = bfqio_cgroup_ioprio_class_write,
+ },
+ { }, /* terminate */
+};
-static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
+static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state
+ *parent_css)
{
- struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off);
+ struct bfqio_cgroup *bgrp;
- return __blkg_prfill_rwstat(sf, pd, &sum);
-}
+ if (parent_css != NULL) {
+ bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL);
+ if (bgrp == NULL)
+ return ERR_PTR(-ENOMEM);
+ } else
+ bgrp = &bfqio_root_cgroup;
-static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- bfqg_prfill_stat_recursive, &blkcg_policy_bfq,
- seq_cft(sf)->private, false);
- return 0;
+ spin_lock_init(&bgrp->lock);
+ INIT_HLIST_HEAD(&bgrp->group_data);
+ bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO;
+ bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS;
+
+ return &bgrp->css;
}
-static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
+/*
+ * We cannot support shared io contexts, as we have no means to support
+ * two tasks with the same ioc in two different groups without major rework
+ * of the main bic/bfqq data structures. By now we allow a task to change
+ * its cgroup only if it's the only owner of its ioc; the drawback of this
+ * behavior is that a group containing a task that forked using CLONE_IO
+ * will not be destroyed until the tasks sharing the ioc die.
+ */
+static int bfqio_can_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
- seq_cft(sf)->private, true);
- return 0;
+ struct task_struct *task;
+ struct io_context *ioc;
+ int ret = 0;
+
+ cgroup_taskset_for_each(task, tset) {
+ /*
+ * task_lock() is needed to avoid races with
+ * exit_io_context()
+ */
+ task_lock(task);
+ ioc = task->io_context;
+ if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1)
+ /*
+ * ioc == NULL means that the task is either too
+ * young or exiting: if it has still no ioc the
+ * ioc can't be shared, if the task is exiting the
+ * attach will fail anyway, no matter what we
+ * return here.
+ */
+ ret = -EINVAL;
+ task_unlock(task);
+ if (ret)
+ break;
+ }
+
+ return ret;
}
-static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
+static void bfqio_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
- struct bfq_group *bfqg = pd_to_bfqg(pd);
- u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples);
- u64 v = 0;
+ struct task_struct *task;
+ struct io_context *ioc;
+ struct io_cq *icq;
- if (samples) {
- v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum);
- v = div64_u64(v, samples);
+ /*
+ * IMPORTANT NOTE: The move of more than one process at a time to a
+ * new group has not yet been tested.
+ */
+ cgroup_taskset_for_each(task, tset) {
+ ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
+ if (ioc) {
+ /*
+ * Handle cgroup change here.
+ */
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node)
+ if (!strncmp(
+ icq->q->elevator->type->elevator_name,
+ "bfq", ELV_NAME_MAX))
+ bfq_bic_change_cgroup(icq_to_bic(icq),
+ css);
+ rcu_read_unlock();
+ put_io_context(ioc);
+ }
}
- __blkg_prfill_u64(sf, pd, v);
- return 0;
}
-/* print avg_queue_size */
-static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v)
+static void bfqio_destroy(struct cgroup_subsys_state *css)
{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- bfqg_prfill_avg_queue_size, &blkcg_policy_bfq,
- 0, false);
- return 0;
+ struct bfqio_cgroup *bgrp = css_to_bfqio(css);
+ struct hlist_node *tmp;
+ struct bfq_group *bfqg;
+
+ /*
+ * Since we are destroying the cgroup, there are no more tasks
+ * referencing it, and all the RCU grace periods that may have
+ * referenced it are ended (as the destruction of the parent
+ * cgroup is RCU-safe); bgrp->group_data will not be accessed by
+ * anything else and we don't need any synchronization.
+ */
+ hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node)
+ bfq_destroy_group(bgrp, bfqg);
+
+ BUG_ON(!hlist_empty(&bgrp->group_data));
+
+ kfree(bgrp);
}
-static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+static int bfqio_css_online(struct cgroup_subsys_state *css)
{
- int ret;
+ struct bfqio_cgroup *bgrp = css_to_bfqio(css);
- ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
- if (ret)
- return NULL;
+ mutex_lock(&bfqio_mutex);
+ bgrp->online = true;
+ mutex_unlock(&bfqio_mutex);
- return blkg_to_bfqg(bfqd->queue->root_blkg);
+ return 0;
}
-static struct cftype bfqio_files[] = {
- {
- .name = "bfq.weight",
- .read_u64 = bfqio_cgroup_weight_read,
- .write_u64 = bfqio_cgroup_weight_write,
- },
- /* statistics, cover only the tasks in the bfqg */
- {
- .name = "bfq.time",
- .private = offsetof(struct bfq_group, stats.time),
- .seq_show = bfqg_print_stat,
- },
- {
- .name = "bfq.sectors",
- .private = offsetof(struct bfq_group, stats.sectors),
- .seq_show = bfqg_print_stat,
- },
- {
- .name = "bfq.io_service_bytes",
- .private = offsetof(struct bfq_group, stats.service_bytes),
- .seq_show = bfqg_print_rwstat,
- },
- {
- .name = "bfq.io_serviced",
- .private = offsetof(struct bfq_group, stats.serviced),
- .seq_show = bfqg_print_rwstat,
- },
- {
- .name = "bfq.io_service_time",
- .private = offsetof(struct bfq_group, stats.service_time),
- .seq_show = bfqg_print_rwstat,
- },
- {
- .name = "bfq.io_wait_time",
- .private = offsetof(struct bfq_group, stats.wait_time),
- .seq_show = bfqg_print_rwstat,
- },
- {
- .name = "bfq.io_merged",
- .private = offsetof(struct bfq_group, stats.merged),
- .seq_show = bfqg_print_rwstat,
- },
- {
- .name = "bfq.io_queued",
- .private = offsetof(struct bfq_group, stats.queued),
- .seq_show = bfqg_print_rwstat,
- },
+static void bfqio_css_offline(struct cgroup_subsys_state *css)
+{
+ struct bfqio_cgroup *bgrp = css_to_bfqio(css);
- /* the same statictics which cover the bfqg and its descendants */
- {
- .name = "bfq.time_recursive",
- .private = offsetof(struct bfq_group, stats.time),
- .seq_show = bfqg_print_stat_recursive,
- },
- {
- .name = "bfq.sectors_recursive",
- .private = offsetof(struct bfq_group, stats.sectors),
- .seq_show = bfqg_print_stat_recursive,
- },
- {
- .name = "bfq.io_service_bytes_recursive",
- .private = offsetof(struct bfq_group, stats.service_bytes),
- .seq_show = bfqg_print_rwstat_recursive,
- },
- {
- .name = "bfq.io_serviced_recursive",
- .private = offsetof(struct bfq_group, stats.serviced),
- .seq_show = bfqg_print_rwstat_recursive,
- },
- {
- .name = "bfq.io_service_time_recursive",
- .private = offsetof(struct bfq_group, stats.service_time),
- .seq_show = bfqg_print_rwstat_recursive,
- },
- {
- .name = "bfq.io_wait_time_recursive",
- .private = offsetof(struct bfq_group, stats.wait_time),
- .seq_show = bfqg_print_rwstat_recursive,
- },
- {
- .name = "bfq.io_merged_recursive",
- .private = offsetof(struct bfq_group, stats.merged),
- .seq_show = bfqg_print_rwstat_recursive,
- },
- {
- .name = "bfq.io_queued_recursive",
- .private = offsetof(struct bfq_group, stats.queued),
- .seq_show = bfqg_print_rwstat_recursive,
- },
- {
- .name = "bfq.avg_queue_size",
- .seq_show = bfqg_print_avg_queue_size,
- },
- {
- .name = "bfq.group_wait_time",
- .private = offsetof(struct bfq_group, stats.group_wait_time),
- .seq_show = bfqg_print_stat,
- },
- {
- .name = "bfq.idle_time",
- .private = offsetof(struct bfq_group, stats.idle_time),
- .seq_show = bfqg_print_stat,
- },
- {
- .name = "bfq.empty_time",
- .private = offsetof(struct bfq_group, stats.empty_time),
- .seq_show = bfqg_print_stat,
- },
- {
- .name = "bfq.dequeue",
- .private = offsetof(struct bfq_group, stats.dequeue),
- .seq_show = bfqg_print_stat,
- },
- {
- .name = "bfq.unaccounted_time",
- .private = offsetof(struct bfq_group, stats.unaccounted_time),
- .seq_show = bfqg_print_stat,
- },
- { } /* terminate */
-};
+ mutex_lock(&bfqio_mutex);
+ bgrp->online = false;
+ mutex_unlock(&bfqio_mutex);
+}
-static struct blkcg_policy blkcg_policy_bfq = {
- .pd_size = sizeof(struct bfq_group),
- .cpd_size = sizeof(struct bfq_group_data),
- .cftypes = bfqio_files,
- .pd_init_fn = bfq_pd_init,
- .cpd_init_fn = bfq_cpd_init,
- .pd_offline_fn = bfq_pd_offline,
- .pd_reset_stats_fn = bfq_pd_reset_stats,
+struct cgroup_subsys bfqio_cgrp_subsys = {
+ .css_alloc = bfqio_create,
+ .css_online = bfqio_css_online,
+ .css_offline = bfqio_css_offline,
+ .can_attach = bfqio_can_attach,
+ .attach = bfqio_attach,
+ .css_free = bfqio_destroy,
+ .legacy_cftypes = bfqio_files,
};
-
#else
-
-static void bfq_init_entity(struct bfq_entity *entity,
- struct bfq_group *bfqg)
+static inline void bfq_init_entity(struct bfq_entity *entity,
+ struct bfq_group *bfqg)
{
- struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
entity->weight = entity->new_weight;
entity->orig_weight = entity->new_weight;
- if (bfqq) {
- bfqq->ioprio = bfqq->new_ioprio;
- bfqq->ioprio_class = bfqq->new_ioprio_class;
- }
+ entity->ioprio = entity->new_ioprio;
+ entity->ioprio_class = entity->new_ioprio_class;
entity->sched_data = &bfqg->sched_data;
}
-static struct bfq_group *
-bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
+static inline struct bfq_group *
+bfq_bic_update_cgroup(struct bfq_io_cq *bic)
{
struct bfq_data *bfqd = bic_to_bfqd(bic);
return bfqd->root_group;
}
-static void bfq_bfqq_move(struct bfq_data *bfqd,
- struct bfq_queue *bfqq,
- struct bfq_entity *entity,
- struct bfq_group *bfqg)
+static inline void bfq_bfqq_move(struct bfq_data *bfqd,
+ struct bfq_queue *bfqq,
+ struct bfq_entity *entity,
+ struct bfq_group *bfqg)
{
}
@@ -1084,24 +909,23 @@ static void bfq_end_wr_async(struct bfq_data *bfqd)
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}
-static void bfq_disconnect_groups(struct bfq_data *bfqd)
+static inline void bfq_disconnect_groups(struct bfq_data *bfqd)
{
bfq_put_async_queues(bfqd, bfqd->root_group);
}
-static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
- struct blkcg *blkcg)
+static inline void bfq_free_root_group(struct bfq_data *bfqd)
{
- return bfqd->root_group;
+ kfree(bfqd->root_group);
}
-static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
+static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node)
{
struct bfq_group *bfqg;
int i;
bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node);
- if (!bfqg)
+ if (bfqg == NULL)
return NULL;
for (i = 0; i < BFQ_IOPRIO_CLASSES; i++)