diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-12-15 14:52:16 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-12-15 14:52:16 -0300 |
commit | 8d91c1e411f55d7ea91b1183a2e9f8088fb4d5be (patch) | |
tree | e9891aa6c295060d065adffd610c4f49ecf884f3 /block/bfq-cgroup.c | |
parent | a71852147516bc1cb5b0b3cbd13639bfd4022dc8 (diff) |
Linux-libre 4.3.2-gnu
Diffstat (limited to 'block/bfq-cgroup.c')
-rw-r--r-- | block/bfq-cgroup.c | 1282 |
1 files changed, 553 insertions, 729 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index bc34d7a2b..11e2f1d4e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -13,480 +13,254 @@ * file. */ -#ifdef CONFIG_BFQ_GROUP_IOSCHED +#ifdef CONFIG_CGROUP_BFQIO -/* bfqg stats flags */ -enum bfqg_stats_flags { - BFQG_stats_waiting = 0, - BFQG_stats_idling, - BFQG_stats_empty, -}; - -#define BFQG_FLAG_FNS(name) \ -static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \ -{ \ - stats->flags |= (1 << BFQG_stats_##name); \ -} \ -static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \ -{ \ - stats->flags &= ~(1 << BFQG_stats_##name); \ -} \ -static int bfqg_stats_##name(struct bfqg_stats *stats) \ -{ \ - return (stats->flags & (1 << BFQG_stats_##name)) != 0; \ -} \ - -BFQG_FLAG_FNS(waiting) -BFQG_FLAG_FNS(idling) -BFQG_FLAG_FNS(empty) -#undef BFQG_FLAG_FNS - -/* This should be called with the queue_lock held. */ -static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) -{ - unsigned long long now; - - if (!bfqg_stats_waiting(stats)) - return; - - now = sched_clock(); - if (time_after64(now, stats->start_group_wait_time)) - blkg_stat_add(&stats->group_wait_time, - now - stats->start_group_wait_time); - bfqg_stats_clear_waiting(stats); -} - -/* This should be called with the queue_lock held. */ -static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, - struct bfq_group *curr_bfqg) -{ - struct bfqg_stats *stats = &bfqg->stats; - - if (bfqg_stats_waiting(stats)) - return; - if (bfqg == curr_bfqg) - return; - stats->start_group_wait_time = sched_clock(); - bfqg_stats_mark_waiting(stats); -} - -/* This should be called with the queue_lock held. */ -static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) -{ - unsigned long long now; - - if (!bfqg_stats_empty(stats)) - return; - - now = sched_clock(); - if (time_after64(now, stats->start_empty_time)) - blkg_stat_add(&stats->empty_time, - now - stats->start_empty_time); - bfqg_stats_clear_empty(stats); -} - -static void bfqg_stats_update_dequeue(struct bfq_group *bfqg) -{ - blkg_stat_add(&bfqg->stats.dequeue, 1); -} +static DEFINE_MUTEX(bfqio_mutex); -static void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) +static bool bfqio_is_removed(struct bfqio_cgroup *bgrp) { - struct bfqg_stats *stats = &bfqg->stats; - - if (blkg_rwstat_total(&stats->queued)) - return; - - /* - * group is already marked empty. This can happen if bfqq got new - * request in parent group and moved to this group while being added - * to service tree. Just ignore the event and move on. - */ - if (bfqg_stats_empty(stats)) - return; - - stats->start_empty_time = sched_clock(); - bfqg_stats_mark_empty(stats); + return bgrp ? !bgrp->online : false; } -static void bfqg_stats_update_idle_time(struct bfq_group *bfqg) -{ - struct bfqg_stats *stats = &bfqg->stats; - - if (bfqg_stats_idling(stats)) { - unsigned long long now = sched_clock(); - - if (time_after64(now, stats->start_idle_time)) - blkg_stat_add(&stats->idle_time, - now - stats->start_idle_time); - bfqg_stats_clear_idling(stats); - } -} - -static void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) -{ - struct bfqg_stats *stats = &bfqg->stats; - - stats->start_idle_time = sched_clock(); - bfqg_stats_mark_idling(stats); -} - -static void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) -{ - struct bfqg_stats *stats = &bfqg->stats; - - blkg_stat_add(&stats->avg_queue_size_sum, - blkg_rwstat_total(&stats->queued)); - blkg_stat_add(&stats->avg_queue_size_samples, 1); - bfqg_stats_update_group_wait_time(stats); -} - -static struct blkcg_policy blkcg_policy_bfq; - -/* - * blk-cgroup policy-related handlers - * The following functions help in converting between blk-cgroup - * internal structures and BFQ-specific structures. - */ - -static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd) -{ - return pd ? container_of(pd, struct bfq_group, pd) : NULL; -} +static struct bfqio_cgroup bfqio_root_cgroup = { + .weight = BFQ_DEFAULT_GRP_WEIGHT, + .ioprio = BFQ_DEFAULT_GRP_IOPRIO, + .ioprio_class = BFQ_DEFAULT_GRP_CLASS, +}; -static struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg) +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) { - return pd_to_blkg(&bfqg->pd); + entity->weight = entity->new_weight; + entity->orig_weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; + entity->parent = bfqg->my_entity; + entity->sched_data = &bfqg->sched_data; } -static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) +static struct bfqio_cgroup *css_to_bfqio(struct cgroup_subsys_state *css) { - return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq)); + return css ? container_of(css, struct bfqio_cgroup, css) : NULL; } /* - * bfq_group handlers - * The following functions help in navigating the bfq_group hierarchy - * by allowing to find the parent of a bfq_group or the bfq_group - * associated to a bfq_queue. + * Search the bfq_group for bfqd into the hash table (by now only a list) + * of bgrp. Must be called under rcu_read_lock(). */ - -static struct bfq_group *bfqg_parent(struct bfq_group *bfqg) +static struct bfq_group *bfqio_lookup_group(struct bfqio_cgroup *bgrp, + struct bfq_data *bfqd) { - struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent; - - return pblkg ? blkg_to_bfqg(pblkg) : NULL; -} - -static struct bfq_group *bfqq_group(struct bfq_queue *bfqq) -{ - struct bfq_entity *group_entity = bfqq->entity.parent; - - return group_entity ? container_of(group_entity, struct bfq_group, - entity) : - bfqq->bfqd->root_group; -} - -/* - * The following two functions handle get and put of a bfq_group by - * wrapping the related blk-cgroup hooks. - */ - -static void bfqg_get(struct bfq_group *bfqg) -{ - return blkg_get(bfqg_to_blkg(bfqg)); -} - -static void bfqg_put(struct bfq_group *bfqg) -{ - return blkg_put(bfqg_to_blkg(bfqg)); -} + struct bfq_group *bfqg; + void *key; -static void bfqg_stats_update_io_add(struct bfq_group *bfqg, - struct bfq_queue *bfqq, - int rw) -{ - blkg_rwstat_add(&bfqg->stats.queued, rw, 1); - bfqg_stats_end_empty_time(&bfqg->stats); - if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) - bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); -} + hlist_for_each_entry_rcu(bfqg, &bgrp->group_data, group_node) { + key = rcu_dereference(bfqg->bfqd); + if (key == bfqd) + return bfqg; + } -static void bfqg_stats_update_io_remove(struct bfq_group *bfqg, int rw) -{ - blkg_rwstat_add(&bfqg->stats.queued, rw, -1); + return NULL; } -static void bfqg_stats_update_io_merged(struct bfq_group *bfqg, int rw) +static inline void bfq_group_init_entity(struct bfqio_cgroup *bgrp, + struct bfq_group *bfqg) { - blkg_rwstat_add(&bfqg->stats.merged, rw, 1); -} + struct bfq_entity *entity = &bfqg->entity; -static void bfqg_stats_update_dispatch(struct bfq_group *bfqg, - uint64_t bytes, int rw) -{ - blkg_stat_add(&bfqg->stats.sectors, bytes >> 9); - blkg_rwstat_add(&bfqg->stats.serviced, rw, 1); - blkg_rwstat_add(&bfqg->stats.service_bytes, rw, bytes); + /* + * If the weight of the entity has never been set via the sysfs + * interface, then bgrp->weight == 0. In this case we initialize + * the weight from the current ioprio value. Otherwise, the group + * weight, if set, has priority over the ioprio value. + */ + if (bgrp->weight == 0) { + entity->new_weight = bfq_ioprio_to_weight(bgrp->ioprio); + entity->new_ioprio = bgrp->ioprio; + } else { + if (bgrp->weight < BFQ_MIN_WEIGHT || + bgrp->weight > BFQ_MAX_WEIGHT) { + printk(KERN_CRIT "bfq_group_init_entity: " + "bgrp->weight %d\n", bgrp->weight); + BUG(); + } + entity->new_weight = bgrp->weight; + entity->new_ioprio = bfq_weight_to_ioprio(bgrp->weight); + } + entity->orig_weight = entity->weight = entity->new_weight; + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class = bgrp->ioprio_class; + entity->my_sched_data = &bfqg->sched_data; + bfqg->active_entities = 0; } -static void bfqg_stats_update_completion(struct bfq_group *bfqg, - uint64_t start_time, uint64_t io_start_time, int rw) +static inline void bfq_group_set_parent(struct bfq_group *bfqg, + struct bfq_group *parent) { - struct bfqg_stats *stats = &bfqg->stats; - unsigned long long now = sched_clock(); - - if (time_after64(now, io_start_time)) - blkg_rwstat_add(&stats->service_time, rw, now - io_start_time); - if (time_after64(io_start_time, start_time)) - blkg_rwstat_add(&stats->wait_time, rw, - io_start_time - start_time); -} + struct bfq_entity *entity; -/* @stats = 0 */ -static void bfqg_stats_reset(struct bfqg_stats *stats) -{ - if (!stats) - return; - - /* queued stats shouldn't be cleared */ - blkg_rwstat_reset(&stats->service_bytes); - blkg_rwstat_reset(&stats->serviced); - blkg_rwstat_reset(&stats->merged); - blkg_rwstat_reset(&stats->service_time); - blkg_rwstat_reset(&stats->wait_time); - blkg_stat_reset(&stats->time); - blkg_stat_reset(&stats->unaccounted_time); - blkg_stat_reset(&stats->avg_queue_size_sum); - blkg_stat_reset(&stats->avg_queue_size_samples); - blkg_stat_reset(&stats->dequeue); - blkg_stat_reset(&stats->group_wait_time); - blkg_stat_reset(&stats->idle_time); - blkg_stat_reset(&stats->empty_time); -} + BUG_ON(parent == NULL); + BUG_ON(bfqg == NULL); -/* @to += @from */ -static void bfqg_stats_merge(struct bfqg_stats *to, struct bfqg_stats *from) -{ - if (!to || !from) - return; - - /* queued stats shouldn't be cleared */ - blkg_rwstat_merge(&to->service_bytes, &from->service_bytes); - blkg_rwstat_merge(&to->serviced, &from->serviced); - blkg_rwstat_merge(&to->merged, &from->merged); - blkg_rwstat_merge(&to->service_time, &from->service_time); - blkg_rwstat_merge(&to->wait_time, &from->wait_time); - blkg_stat_merge(&from->time, &from->time); - blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time); - blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum); - blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples); - blkg_stat_merge(&to->dequeue, &from->dequeue); - blkg_stat_merge(&to->group_wait_time, &from->group_wait_time); - blkg_stat_merge(&to->idle_time, &from->idle_time); - blkg_stat_merge(&to->empty_time, &from->empty_time); + entity = &bfqg->entity; + entity->parent = parent->my_entity; + entity->sched_data = &parent->sched_data; } -/* - * Transfer @bfqg's stats to its parent's dead_stats so that the ancestors' - * recursive stats can still account for the amount used by this bfqg after - * it's gone. +/** + * bfq_group_chain_alloc - allocate a chain of groups. + * @bfqd: queue descriptor. + * @css: the leaf cgroup_subsys_state this chain starts from. + * + * Allocate a chain of groups starting from the one belonging to + * @cgroup up to the root cgroup. Stop if a cgroup on the chain + * to the root has already an allocated group on @bfqd. */ -static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) +static struct bfq_group *bfq_group_chain_alloc(struct bfq_data *bfqd, + struct cgroup_subsys_state *css) { - struct bfq_group *parent; - - if (!bfqg) /* root_group */ - return; + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *prev = NULL, *leaf = NULL; - parent = bfqg_parent(bfqg); + for (; css != NULL; css = css->parent) { + bgrp = css_to_bfqio(css); - lockdep_assert_held(bfqg_to_blkg(bfqg)->q->queue_lock); - - if (unlikely(!parent)) - return; + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) { + /* + * All the cgroups in the path from there to the + * root must have a bfq_group for bfqd, so we don't + * need any more allocations. + */ + break; + } - bfqg_stats_merge(&parent->dead_stats, &bfqg->stats); - bfqg_stats_merge(&parent->dead_stats, &bfqg->dead_stats); - bfqg_stats_reset(&bfqg->stats); - bfqg_stats_reset(&bfqg->dead_stats); -} + bfqg = kzalloc(sizeof(*bfqg), GFP_ATOMIC); + if (bfqg == NULL) + goto cleanup; -static void bfq_init_entity(struct bfq_entity *entity, - struct bfq_group *bfqg) -{ - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + bfq_group_init_entity(bgrp, bfqg); + bfqg->my_entity = &bfqg->entity; - entity->weight = entity->new_weight; - entity->orig_weight = entity->new_weight; - if (bfqq) { - bfqq->ioprio = bfqq->new_ioprio; - bfqq->ioprio_class = bfqq->new_ioprio_class; - bfqg_get(bfqg); + if (leaf == NULL) { + leaf = bfqg; + prev = leaf; + } else { + bfq_group_set_parent(prev, bfqg); + /* + * Build a list of allocated nodes using the bfqd + * filed, that is still unused and will be + * initialized only after the node will be + * connected. + */ + prev->bfqd = bfqg; + prev = bfqg; + } } - entity->parent = bfqg->my_entity; - entity->sched_data = &bfqg->sched_data; -} - -static void bfqg_stats_init(struct bfqg_stats *stats) -{ - blkg_rwstat_init(&stats->service_bytes); - blkg_rwstat_init(&stats->serviced); - blkg_rwstat_init(&stats->merged); - blkg_rwstat_init(&stats->service_time); - blkg_rwstat_init(&stats->wait_time); - blkg_rwstat_init(&stats->queued); - - blkg_stat_init(&stats->sectors); - blkg_stat_init(&stats->time); - - blkg_stat_init(&stats->unaccounted_time); - blkg_stat_init(&stats->avg_queue_size_sum); - blkg_stat_init(&stats->avg_queue_size_samples); - blkg_stat_init(&stats->dequeue); - blkg_stat_init(&stats->group_wait_time); - blkg_stat_init(&stats->idle_time); - blkg_stat_init(&stats->empty_time); -} - -static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd) - { - return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL; - } - -static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) -{ - return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); -} - -static void bfq_cpd_init(const struct blkcg *blkcg) -{ - struct bfq_group_data *d = - cpd_to_bfqgd(blkcg->pd[blkcg_policy_bfq.plid]); - - d->weight = BFQ_DEFAULT_GRP_WEIGHT; -} - -static void bfq_pd_init(struct blkcg_gq *blkg) -{ - struct bfq_group *bfqg = blkg_to_bfqg(blkg); - struct bfq_data *bfqd = blkg->q->elevator->elevator_data; - struct bfq_entity *entity = &bfqg->entity; - struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); - entity->orig_weight = entity->weight = entity->new_weight = d->weight; - entity->my_sched_data = &bfqg->sched_data; - bfqg->my_entity = entity; /* - * the root_group's will be set to NULL - * in bfq_init_queue() - */ - bfqg->bfqd = bfqd; - bfqg->active_entities = 0; - bfqg->rq_pos_tree = RB_ROOT; + return leaf; - /* if the root_group does not exist, we are handling it right now */ - if (bfqd->root_group && bfqg != bfqd->root_group) - hlist_add_head(&bfqg->bfqd_node, &bfqd->group_list); +cleanup: + while (leaf != NULL) { + prev = leaf; + leaf = leaf->bfqd; + kfree(prev); + } - bfqg_stats_init(&bfqg->stats); - bfqg_stats_init(&bfqg->dead_stats); + return NULL; } -/* offset delta from bfqg->stats to bfqg->dead_stats */ -static const int dead_stats_off_delta = offsetof(struct bfq_group, dead_stats) - - offsetof(struct bfq_group, stats); - -/* to be used by recursive prfill, sums live and dead stats recursively */ -static u64 bfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off) +/** + * bfq_group_chain_link - link an allocated group chain to a cgroup + * hierarchy. + * @bfqd: the queue descriptor. + * @css: the leaf cgroup_subsys_state to start from. + * @leaf: the leaf group (to be associated to @cgroup). + * + * Try to link a chain of groups to a cgroup hierarchy, connecting the + * nodes bottom-up, so we can be sure that when we find a cgroup in the + * hierarchy that already as a group associated to @bfqd all the nodes + * in the path to the root cgroup have one too. + * + * On locking: the queue lock protects the hierarchy (there is a hierarchy + * per device) while the bfqio_cgroup lock protects the list of groups + * belonging to the same cgroup. + */ +static void bfq_group_chain_link(struct bfq_data *bfqd, + struct cgroup_subsys_state *css, + struct bfq_group *leaf) { - u64 sum = 0; + struct bfqio_cgroup *bgrp; + struct bfq_group *bfqg, *next, *prev = NULL; + unsigned long flags; - sum += blkg_stat_recursive_sum(pd, off); - sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta); - return sum; -} + assert_spin_locked(bfqd->queue->queue_lock); -/* to be used by recursive prfill, sums live and dead rwstats recursively */ -static struct blkg_rwstat bfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, - int off) -{ - struct blkg_rwstat a, b; + for (; css != NULL && leaf != NULL; css = css->parent) { + bgrp = css_to_bfqio(css); + next = leaf->bfqd; - a = blkg_rwstat_recursive_sum(pd, off); - b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta); - blkg_rwstat_merge(&a, &b); - return a; -} + bfqg = bfqio_lookup_group(bgrp, bfqd); + BUG_ON(bfqg != NULL); -static void bfq_pd_reset_stats(struct blkcg_gq *blkg) -{ - struct bfq_group *bfqg = blkg_to_bfqg(blkg); + spin_lock_irqsave(&bgrp->lock, flags); - bfqg_stats_reset(&bfqg->stats); - bfqg_stats_reset(&bfqg->dead_stats); -} + rcu_assign_pointer(leaf->bfqd, bfqd); + hlist_add_head_rcu(&leaf->group_node, &bgrp->group_data); + hlist_add_head(&leaf->bfqd_node, &bfqd->group_list); -static void bfq_group_set_parent(struct bfq_group *bfqg, - struct bfq_group *parent) -{ - struct bfq_entity *entity; + spin_unlock_irqrestore(&bgrp->lock, flags); - BUG_ON(!parent); - BUG_ON(!bfqg); - BUG_ON(bfqg == parent); + prev = leaf; + leaf = next; + } - entity = &bfqg->entity; - entity->parent = parent->my_entity; - entity->sched_data = &parent->sched_data; + BUG_ON(css == NULL && leaf != NULL); + if (css != NULL && prev != NULL) { + bgrp = css_to_bfqio(css); + bfqg = bfqio_lookup_group(bgrp, bfqd); + bfq_group_set_parent(prev, bfqg); + } } +/** + * bfq_find_alloc_group - return the group associated to @bfqd in @cgroup. + * @bfqd: queue descriptor. + * @cgroup: cgroup being searched for. + * + * Return a group associated to @bfqd in @cgroup, allocating one if + * necessary. When a group is returned all the cgroups in the path + * to the root have a group associated to @bfqd. + * + * If the allocation fails, return the root group: this breaks guarantees + * but is a safe fallback. If this loss becomes a problem it can be + * mitigated using the equivalent weight (given by the product of the + * weights of the groups in the path from @group to the root) in the + * root scheduler. + * + * We allocate all the missing nodes in the path from the leaf cgroup + * to the root and we connect the nodes only after all the allocations + * have been successful. + */ static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, - struct blkcg *blkcg) + struct cgroup_subsys_state *css) { - struct request_queue *q = bfqd->queue; - struct bfq_group *bfqg = NULL, *parent; - struct bfq_entity *entity = NULL; + struct bfqio_cgroup *bgrp = css_to_bfqio(css); + struct bfq_group *bfqg; - assert_spin_locked(bfqd->queue->queue_lock); + bfqg = bfqio_lookup_group(bgrp, bfqd); + if (bfqg != NULL) + return bfqg; - /* avoid lookup for the common case where there's no blkcg */ - if (blkcg == &blkcg_root) { + bfqg = bfq_group_chain_alloc(bfqd, css); + if (bfqg != NULL) + bfq_group_chain_link(bfqd, css, bfqg); + else bfqg = bfqd->root_group; - } else { - struct blkcg_gq *blkg; - - blkg = blkg_lookup_create(blkcg, q); - if (!IS_ERR(blkg)) - bfqg = blkg_to_bfqg(blkg); - else /* fallback to root_group */ - bfqg = bfqd->root_group; - } - - BUG_ON(!bfqg); - - /* - * Update chain of bfq_groups as we might be handling a leaf group - * which, along with some of its relatives, has not been hooked yet - * to the private hierarchy of BFQ. - */ - entity = &bfqg->entity; - for_each_entity(entity) { - bfqg = container_of(entity, struct bfq_group, entity); - BUG_ON(!bfqg); - if (bfqg != bfqd->root_group) { - parent = bfqg_parent(bfqg); - if (!parent) - parent = bfqd->root_group; - BUG_ON(!parent); - bfq_group_set_parent(bfqg, parent); - } - } return bfqg; } -static void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq); - /** * bfq_bfqq_move - migrate @bfqq to @bfqg. * @bfqd: queue descriptor. @@ -522,7 +296,6 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_deactivate_bfqq(bfqd, bfqq, 0); } else if (entity->on_st) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); - bfqg_put(bfqq_group(bfqq)); /* * Here we use a reference to bfqg. We don't need a refcounter @@ -531,15 +304,11 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, */ entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; - bfqg_get(bfqg); - if (busy) { - bfq_pos_tree_add_move(bfqd, bfqq); - if (resume) - bfq_activate_bfqq(bfqd, bfqq); - } + if (busy && resume) + bfq_activate_bfqq(bfqd, bfqq); - if (!bfqd->in_service_queue && !bfqd->rq_in_driver) + if (bfqd->in_service_queue == NULL && !bfqd->rq_in_driver) bfq_schedule_dispatch(bfqd); } @@ -547,9 +316,9 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * __bfq_bic_change_cgroup - move @bic to @cgroup. * @bfqd: the queue descriptor. * @bic: the bic to move. - * @blkcg: the blk-cgroup to move to. + * @cgroup: the cgroup to move to. * - * Move bic to blkcg, assuming that bfqd->queue is locked; the caller + * Move bic to cgroup, assuming that bfqd->queue is locked; the caller * has to make sure that the reference to cgroup is valid across the call. * * NOTE: an alternative approach might have been to store the current @@ -558,17 +327,18 @@ static void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, */ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, struct bfq_io_cq *bic, - struct blkcg *blkcg) + struct cgroup_subsys_state *css) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); - struct bfq_group *bfqg; struct bfq_entity *entity; + struct bfq_group *bfqg; + struct bfqio_cgroup *bgrp; - lockdep_assert_held(bfqd->queue->queue_lock); + bgrp = css_to_bfqio(css); - bfqg = bfq_find_alloc_group(bfqd, blkcg); - if (async_bfqq) { + bfqg = bfq_find_alloc_group(bfqd, css); + if (async_bfqq != NULL) { entity = &async_bfqq->entity; if (entity->sched_data != &bfqg->sched_data) { @@ -580,7 +350,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, } } - if (sync_bfqq) { + if (sync_bfqq != NULL) { entity = &sync_bfqq->entity; if (entity->sched_data != &bfqg->sched_data) bfq_bfqq_move(bfqd, sync_bfqq, entity, bfqg); @@ -589,39 +359,74 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, return bfqg; } -static void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) +/** + * bfq_bic_change_cgroup - move @bic to @cgroup. + * @bic: the bic being migrated. + * @cgroup: the destination cgroup. + * + * When the task owning @bic is moved to @cgroup, @bic is immediately + * moved into its new parent group. + */ +static void bfq_bic_change_cgroup(struct bfq_io_cq *bic, + struct cgroup_subsys_state *css) +{ + struct bfq_data *bfqd; + unsigned long uninitialized_var(flags); + + bfqd = bfq_get_bfqd_locked(&(bic->icq.q->elevator->elevator_data), + &flags); + if (bfqd != NULL) { + __bfq_bic_change_cgroup(bfqd, bic, css); + bfq_put_bfqd_unlock(bfqd, &flags); + } +} + +/** + * bfq_bic_update_cgroup - update the cgroup of @bic. + * @bic: the @bic to update. + * + * Make sure that @bic is enqueued in the cgroup of the current task. + * We need this in addition to moving bics during the cgroup attach + * phase because the task owning @bic could be at its first disk + * access or we may end up in the root cgroup as the result of a + * memory allocation failure and here we try to move to the right + * group. + * + * Must be called under the queue lock. It is safe to use the returned + * value even after the rcu_read_unlock() as the migration/destruction + * paths act under the queue lock too. IOW it is impossible to race with + * group migration/destruction and end up with an invalid group as: + * a) here cgroup has not yet been destroyed, nor its destroy callback + * has started execution, as current holds a reference to it, + * b) if it is destroyed after rcu_read_unlock() [after current is + * migrated to a different cgroup] its attach() callback will have + * taken care of remove all the references to the old cgroup data. + */ +static struct bfq_group *bfq_bic_update_cgroup(struct bfq_io_cq *bic) { struct bfq_data *bfqd = bic_to_bfqd(bic); - struct blkcg *blkcg; - struct bfq_group *bfqg = NULL; - uint64_t id; + struct bfq_group *bfqg; + struct cgroup_subsys_state *css; + + BUG_ON(bfqd == NULL); rcu_read_lock(); - blkcg = bio_blkcg(bio); - id = blkcg->css.serial_nr; + css = task_css(current, bfqio_cgrp_id); + bfqg = __bfq_bic_change_cgroup(bfqd, bic, css); rcu_read_unlock(); - /* - * Check whether blkcg has changed. The condition may trigger - * spuriously on a newly created cic but there's no harm. - */ - if (unlikely(!bfqd) || likely(bic->blkcg_id == id)) - return; - - bfqg = __bfq_bic_change_cgroup(bfqd, bic, blkcg); - BUG_ON(!bfqg); - bic->blkcg_id = id; + return bfqg; } /** * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. * @st: the service tree being flushed. */ -static void bfq_flush_idle_tree(struct bfq_service_tree *st) +static inline void bfq_flush_idle_tree(struct bfq_service_tree *st) { struct bfq_entity *entity = st->first_idle; - for (; entity ; entity = st->first_idle) + for (; entity != NULL; entity = st->first_idle) __bfq_deactivate_entity(entity, 0); } @@ -630,12 +435,12 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) * @bfqd: the device data structure with the root group. * @entity: the entity to move. */ -static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, - struct bfq_entity *entity) +static inline void bfq_reparent_leaf_entity(struct bfq_data *bfqd, + struct bfq_entity *entity) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - BUG_ON(!bfqq); + BUG_ON(bfqq == NULL); bfq_bfqq_move(bfqd, bfqq, entity, bfqd->root_group); return; } @@ -649,9 +454,9 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, * * Needs queue_lock to be taken and reference to be valid over the call. */ -static void bfq_reparent_active_entities(struct bfq_data *bfqd, - struct bfq_group *bfqg, - struct bfq_service_tree *st) +static inline void bfq_reparent_active_entities(struct bfq_data *bfqd, + struct bfq_group *bfqg, + struct bfq_service_tree *st) { struct rb_root *active = &st->active; struct bfq_entity *entity = NULL; @@ -659,10 +464,10 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd, if (!RB_EMPTY_ROOT(&st->active)) entity = bfq_entity_of(rb_first(active)); - for (; entity ; entity = bfq_entity_of(rb_first(active))) + for (; entity != NULL; entity = bfq_entity_of(rb_first(active))) bfq_reparent_leaf_entity(bfqd, entity); - if (bfqg->sched_data.in_service_entity) + if (bfqg->sched_data.in_service_entity != NULL) bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.in_service_entity); @@ -671,21 +476,20 @@ static void bfq_reparent_active_entities(struct bfq_data *bfqd, /** * bfq_destroy_group - destroy @bfqg. + * @bgrp: the bfqio_cgroup containing @bfqg. * @bfqg: the group being destroyed. * * Destroy @bfqg, making sure that it is not referenced from its parent. - * blkio already grabs the queue_lock for us, so no need to use RCU-based magic */ -static void bfq_pd_offline(struct blkcg_gq *blkg) +static void bfq_destroy_group(struct bfqio_cgroup *bgrp, struct bfq_group *bfqg) { + struct bfq_data *bfqd; struct bfq_service_tree *st; - struct bfq_group *bfqg = blkg_to_bfqg(blkg); - struct bfq_data *bfqd = bfqg->bfqd; struct bfq_entity *entity = bfqg->my_entity; + unsigned long uninitialized_var(flags); int i; - if (!entity) /* root group */ - return; + hlist_del(&bfqg->group_node); /* * Empty all service_trees belonging to this group before @@ -714,19 +518,37 @@ static void bfq_pd_offline(struct blkcg_gq *blkg) * There is no need to put the sync queues, as the * scheduler has taken no reference. */ - bfq_reparent_active_entities(bfqd, bfqg, st); + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + bfq_reparent_active_entities(bfqd, bfqg, st); + bfq_put_bfqd_unlock(bfqd, &flags); + } BUG_ON(!RB_EMPTY_ROOT(&st->active)); BUG_ON(!RB_EMPTY_ROOT(&st->idle)); } - BUG_ON(bfqg->sched_data.next_in_service); - BUG_ON(bfqg->sched_data.in_service_entity); + BUG_ON(bfqg->sched_data.next_in_service != NULL); + BUG_ON(bfqg->sched_data.in_service_entity != NULL); - hlist_del(&bfqg->bfqd_node); - __bfq_deactivate_entity(entity, 0); - bfq_put_async_queues(bfqd, bfqg); - BUG_ON(entity->tree); + /* + * We may race with device destruction, take extra care when + * dereferencing bfqg->bfqd. + */ + bfqd = bfq_get_bfqd_locked(&bfqg->bfqd, &flags); + if (bfqd != NULL) { + hlist_del(&bfqg->bfqd_node); + __bfq_deactivate_entity(entity, 0); + bfq_put_async_queues(bfqd, bfqg); + bfq_put_bfqd_unlock(bfqd, &flags); + } + BUG_ON(entity->tree != NULL); - bfqg_stats_xfer_dead(bfqg); + /* + * No need to defer the kfree() to the end of the RCU grace + * period: we are called from the destroy() callback of our + * cgroup, so we can be sure that no one is a) still using + * this cgroup or b) doing lookups in it. + */ + kfree(bfqg); } static void bfq_end_wr_async(struct bfq_data *bfqd) @@ -773,309 +595,312 @@ static void bfq_disconnect_groups(struct bfq_data *bfqd) } } -static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css, - struct cftype *cftype) +static inline void bfq_free_root_group(struct bfq_data *bfqd) { - struct blkcg *blkcg = css_to_blkcg(css); - struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); - int ret = -EINVAL; + struct bfqio_cgroup *bgrp = &bfqio_root_cgroup; + struct bfq_group *bfqg = bfqd->root_group; + + bfq_put_async_queues(bfqd, bfqg); - spin_lock_irq(&blkcg->lock); - ret = bfqgd->weight; - spin_unlock_irq(&blkcg->lock); + spin_lock_irq(&bgrp->lock); + hlist_del_rcu(&bfqg->group_node); + spin_unlock_irq(&bgrp->lock); - return ret; + /* + * No need to synchronize_rcu() here: since the device is gone + * there cannot be any read-side access to its root_group. + */ + kfree(bfqg); } -static int bfqio_cgroup_weight_write(struct cgroup_subsys_state *css, - struct cftype *cftype, - u64 val) +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) { - struct blkcg *blkcg = css_to_blkcg(css); - struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); - struct blkcg_gq *blkg; - int ret = -EINVAL; - - if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT) - return ret; - - ret = 0; - spin_lock_irq(&blkcg->lock); - bfqgd->weight = (unsigned short)val; - hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { - struct bfq_group *bfqg = blkg_to_bfqg(blkg); - if (!bfqg) - continue; - /* - * Setting the prio_changed flag of the entity - * to 1 with new_weight == weight would re-set - * the value of the weight to its ioprio mapping. - * Set the flag only if necessary. - */ - if ((unsigned short)val != bfqg->entity.new_weight) { - bfqg->entity.new_weight = (unsigned short)val; - /* - * Make sure that the above new value has been - * stored in bfqg->entity.new_weight before - * setting the prio_changed flag. In fact, - * this flag may be read asynchronously (in - * critical sections protected by a different - * lock than that held here), and finding this - * flag set may cause the execution of the code - * for updating parameters whose value may - * depend also on bfqg->entity.new_weight (in - * __bfq_entity_update_weight_prio). - * This barrier makes sure that the new value - * of bfqg->entity.new_weight is correctly - * seen in that code. - */ - smp_wmb(); - bfqg->entity.prio_changed = 1; - } - } - spin_unlock_irq(&blkcg->lock); + struct bfq_group *bfqg; + struct bfqio_cgroup *bgrp; + int i; - return ret; -} + bfqg = kzalloc_node(sizeof(*bfqg), GFP_KERNEL, node); + if (bfqg == NULL) + return NULL; -static int bfqg_print_stat(struct seq_file *sf, void *v) -{ - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, - &blkcg_policy_bfq, seq_cft(sf)->private, false); - return 0; -} + bfqg->entity.parent = NULL; + for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) + bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; -static int bfqg_print_rwstat(struct seq_file *sf, void *v) -{ - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, - &blkcg_policy_bfq, seq_cft(sf)->private, true); - return 0; + bgrp = &bfqio_root_cgroup; + spin_lock_irq(&bgrp->lock); + rcu_assign_pointer(bfqg->bfqd, bfqd); + hlist_add_head_rcu(&bfqg->group_node, &bgrp->group_data); + spin_unlock_irq(&bgrp->lock); + + return bfqg; } -static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, - struct blkg_policy_data *pd, int off) -{ - u64 sum = bfqg_stat_pd_recursive_sum(pd, off); +#define SHOW_FUNCTION(__VAR) \ +static u64 bfqio_cgroup_##__VAR##_read(struct cgroup_subsys_state *css, \ + struct cftype *cftype) \ +{ \ + struct bfqio_cgroup *bgrp = css_to_bfqio(css); \ + u64 ret = -ENODEV; \ + \ + mutex_lock(&bfqio_mutex); \ + if (bfqio_is_removed(bgrp)) \ + goto out_unlock; \ + \ + spin_lock_irq(&bgrp->lock); \ + ret = bgrp->__VAR; \ + spin_unlock_irq(&bgrp->lock); \ + \ +out_unlock: \ + mutex_unlock(&bfqio_mutex); \ + return ret; \ +} + +SHOW_FUNCTION(weight); +SHOW_FUNCTION(ioprio); +SHOW_FUNCTION(ioprio_class); +#undef SHOW_FUNCTION + +#define STORE_FUNCTION(__VAR, __MIN, __MAX) \ +static int bfqio_cgroup_##__VAR##_write(struct cgroup_subsys_state *css,\ + struct cftype *cftype, \ + u64 val) \ +{ \ + struct bfqio_cgroup *bgrp = css_to_bfqio(css); \ + struct bfq_group *bfqg; \ + int ret = -EINVAL; \ + \ + if (val < (__MIN) || val > (__MAX)) \ + return ret; \ + \ + ret = -ENODEV; \ + mutex_lock(&bfqio_mutex); \ + if (bfqio_is_removed(bgrp)) \ + goto out_unlock; \ + ret = 0; \ + \ + spin_lock_irq(&bgrp->lock); \ + bgrp->__VAR = (unsigned short)val; \ + hlist_for_each_entry(bfqg, &bgrp->group_data, group_node) { \ + /* \ + * Setting the ioprio_changed flag of the entity \ + * to 1 with new_##__VAR == ##__VAR would re-set \ + * the value of the weight to its ioprio mapping. \ + * Set the flag only if necessary. \ + */ \ + if ((unsigned short)val != bfqg->entity.new_##__VAR) { \ + bfqg->entity.new_##__VAR = (unsigned short)val; \ + /* \ + * Make sure that the above new value has been \ + * stored in bfqg->entity.new_##__VAR before \ + * setting the ioprio_changed flag. In fact, \ + * this flag may be read asynchronously (in \ + * critical sections protected by a different \ + * lock than that held here), and finding this \ + * flag set may cause the execution of the code \ + * for updating parameters whose value may \ + * depend also on bfqg->entity.new_##__VAR (in \ + * __bfq_entity_update_weight_prio). \ + * This barrier makes sure that the new value \ + * of bfqg->entity.new_##__VAR is correctly \ + * seen in that code. \ + */ \ + smp_wmb(); \ + bfqg->entity.ioprio_changed = 1; \ + } \ + } \ + spin_unlock_irq(&bgrp->lock); \ + \ +out_unlock: \ + mutex_unlock(&bfqio_mutex); \ + return ret; \ +} + +STORE_FUNCTION(weight, BFQ_MIN_WEIGHT, BFQ_MAX_WEIGHT); +STORE_FUNCTION(ioprio, 0, IOPRIO_BE_NR - 1); +STORE_FUNCTION(ioprio_class, IOPRIO_CLASS_RT, IOPRIO_CLASS_IDLE); +#undef STORE_FUNCTION - return __blkg_prfill_u64(sf, pd, sum); -} +static struct cftype bfqio_files[] = { + { + .name = "weight", + .read_u64 = bfqio_cgroup_weight_read, + .write_u64 = bfqio_cgroup_weight_write, + }, + { + .name = "ioprio", + .read_u64 = bfqio_cgroup_ioprio_read, + .write_u64 = bfqio_cgroup_ioprio_write, + }, + { + .name = "ioprio_class", + .read_u64 = bfqio_cgroup_ioprio_class_read, + .write_u64 = bfqio_cgroup_ioprio_class_write, + }, + { }, /* terminate */ +}; -static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, - struct blkg_policy_data *pd, int off) +static struct cgroup_subsys_state *bfqio_create(struct cgroup_subsys_state + *parent_css) { - struct blkg_rwstat sum = bfqg_rwstat_pd_recursive_sum(pd, off); + struct bfqio_cgroup *bgrp; - return __blkg_prfill_rwstat(sf, pd, &sum); -} + if (parent_css != NULL) { + bgrp = kzalloc(sizeof(*bgrp), GFP_KERNEL); + if (bgrp == NULL) + return ERR_PTR(-ENOMEM); + } else + bgrp = &bfqio_root_cgroup; -static int bfqg_print_stat_recursive(struct seq_file *sf, void *v) -{ - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), - bfqg_prfill_stat_recursive, &blkcg_policy_bfq, - seq_cft(sf)->private, false); - return 0; + spin_lock_init(&bgrp->lock); + INIT_HLIST_HEAD(&bgrp->group_data); + bgrp->ioprio = BFQ_DEFAULT_GRP_IOPRIO; + bgrp->ioprio_class = BFQ_DEFAULT_GRP_CLASS; + + return &bgrp->css; } -static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) +/* + * We cannot support shared io contexts, as we have no means to support + * two tasks with the same ioc in two different groups without major rework + * of the main bic/bfqq data structures. By now we allow a task to change + * its cgroup only if it's the only owner of its ioc; the drawback of this + * behavior is that a group containing a task that forked using CLONE_IO + * will not be destroyed until the tasks sharing the ioc die. + */ +static int bfqio_can_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) { - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), - bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq, - seq_cft(sf)->private, true); - return 0; + struct task_struct *task; + struct io_context *ioc; + int ret = 0; + + cgroup_taskset_for_each(task, tset) { + /* + * task_lock() is needed to avoid races with + * exit_io_context() + */ + task_lock(task); + ioc = task->io_context; + if (ioc != NULL && atomic_read(&ioc->nr_tasks) > 1) + /* + * ioc == NULL means that the task is either too + * young or exiting: if it has still no ioc the + * ioc can't be shared, if the task is exiting the + * attach will fail anyway, no matter what we + * return here. + */ + ret = -EINVAL; + task_unlock(task); + if (ret) + break; + } + + return ret; } -static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, - struct blkg_policy_data *pd, int off) +static void bfqio_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) { - struct bfq_group *bfqg = pd_to_bfqg(pd); - u64 samples = blkg_stat_read(&bfqg->stats.avg_queue_size_samples); - u64 v = 0; + struct task_struct *task; + struct io_context *ioc; + struct io_cq *icq; - if (samples) { - v = blkg_stat_read(&bfqg->stats.avg_queue_size_sum); - v = div64_u64(v, samples); + /* + * IMPORTANT NOTE: The move of more than one process at a time to a + * new group has not yet been tested. + */ + cgroup_taskset_for_each(task, tset) { + ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); + if (ioc) { + /* + * Handle cgroup change here. + */ + rcu_read_lock(); + hlist_for_each_entry_rcu(icq, &ioc->icq_list, ioc_node) + if (!strncmp( + icq->q->elevator->type->elevator_name, + "bfq", ELV_NAME_MAX)) + bfq_bic_change_cgroup(icq_to_bic(icq), + css); + rcu_read_unlock(); + put_io_context(ioc); + } } - __blkg_prfill_u64(sf, pd, v); - return 0; } -/* print avg_queue_size */ -static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v) +static void bfqio_destroy(struct cgroup_subsys_state *css) { - blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), - bfqg_prfill_avg_queue_size, &blkcg_policy_bfq, - 0, false); - return 0; + struct bfqio_cgroup *bgrp = css_to_bfqio(css); + struct hlist_node *tmp; + struct bfq_group *bfqg; + + /* + * Since we are destroying the cgroup, there are no more tasks + * referencing it, and all the RCU grace periods that may have + * referenced it are ended (as the destruction of the parent + * cgroup is RCU-safe); bgrp->group_data will not be accessed by + * anything else and we don't need any synchronization. + */ + hlist_for_each_entry_safe(bfqg, tmp, &bgrp->group_data, group_node) + bfq_destroy_group(bgrp, bfqg); + + BUG_ON(!hlist_empty(&bgrp->group_data)); + + kfree(bgrp); } -static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) +static int bfqio_css_online(struct cgroup_subsys_state *css) { - int ret; + struct bfqio_cgroup *bgrp = css_to_bfqio(css); - ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq); - if (ret) - return NULL; + mutex_lock(&bfqio_mutex); + bgrp->online = true; + mutex_unlock(&bfqio_mutex); - return blkg_to_bfqg(bfqd->queue->root_blkg); + return 0; } -static struct cftype bfqio_files[] = { - { - .name = "bfq.weight", - .read_u64 = bfqio_cgroup_weight_read, - .write_u64 = bfqio_cgroup_weight_write, - }, - /* statistics, cover only the tasks in the bfqg */ - { - .name = "bfq.time", - .private = offsetof(struct bfq_group, stats.time), - .seq_show = bfqg_print_stat, - }, - { - .name = "bfq.sectors", - .private = offsetof(struct bfq_group, stats.sectors), - .seq_show = bfqg_print_stat, - }, - { - .name = "bfq.io_service_bytes", - .private = offsetof(struct bfq_group, stats.service_bytes), - .seq_show = bfqg_print_rwstat, - }, - { - .name = "bfq.io_serviced", - .private = offsetof(struct bfq_group, stats.serviced), - .seq_show = bfqg_print_rwstat, - }, - { - .name = "bfq.io_service_time", - .private = offsetof(struct bfq_group, stats.service_time), - .seq_show = bfqg_print_rwstat, - }, - { - .name = "bfq.io_wait_time", - .private = offsetof(struct bfq_group, stats.wait_time), - .seq_show = bfqg_print_rwstat, - }, - { - .name = "bfq.io_merged", - .private = offsetof(struct bfq_group, stats.merged), - .seq_show = bfqg_print_rwstat, - }, - { - .name = "bfq.io_queued", - .private = offsetof(struct bfq_group, stats.queued), - .seq_show = bfqg_print_rwstat, - }, +static void bfqio_css_offline(struct cgroup_subsys_state *css) +{ + struct bfqio_cgroup *bgrp = css_to_bfqio(css); - /* the same statictics which cover the bfqg and its descendants */ - { - .name = "bfq.time_recursive", - .private = offsetof(struct bfq_group, stats.time), - .seq_show = bfqg_print_stat_recursive, - }, - { - .name = "bfq.sectors_recursive", - .private = offsetof(struct bfq_group, stats.sectors), - .seq_show = bfqg_print_stat_recursive, - }, - { - .name = "bfq.io_service_bytes_recursive", - .private = offsetof(struct bfq_group, stats.service_bytes), - .seq_show = bfqg_print_rwstat_recursive, - }, - { - .name = "bfq.io_serviced_recursive", - .private = offsetof(struct bfq_group, stats.serviced), - .seq_show = bfqg_print_rwstat_recursive, - }, - { - .name = "bfq.io_service_time_recursive", - .private = offsetof(struct bfq_group, stats.service_time), - .seq_show = bfqg_print_rwstat_recursive, - }, - { - .name = "bfq.io_wait_time_recursive", - .private = offsetof(struct bfq_group, stats.wait_time), - .seq_show = bfqg_print_rwstat_recursive, - }, - { - .name = "bfq.io_merged_recursive", - .private = offsetof(struct bfq_group, stats.merged), - .seq_show = bfqg_print_rwstat_recursive, - }, - { - .name = "bfq.io_queued_recursive", - .private = offsetof(struct bfq_group, stats.queued), - .seq_show = bfqg_print_rwstat_recursive, - }, - { - .name = "bfq.avg_queue_size", - .seq_show = bfqg_print_avg_queue_size, - }, - { - .name = "bfq.group_wait_time", - .private = offsetof(struct bfq_group, stats.group_wait_time), - .seq_show = bfqg_print_stat, - }, - { - .name = "bfq.idle_time", - .private = offsetof(struct bfq_group, stats.idle_time), - .seq_show = bfqg_print_stat, - }, - { - .name = "bfq.empty_time", - .private = offsetof(struct bfq_group, stats.empty_time), - .seq_show = bfqg_print_stat, - }, - { - .name = "bfq.dequeue", - .private = offsetof(struct bfq_group, stats.dequeue), - .seq_show = bfqg_print_stat, - }, - { - .name = "bfq.unaccounted_time", - .private = offsetof(struct bfq_group, stats.unaccounted_time), - .seq_show = bfqg_print_stat, - }, - { } /* terminate */ -}; + mutex_lock(&bfqio_mutex); + bgrp->online = false; + mutex_unlock(&bfqio_mutex); +} -static struct blkcg_policy blkcg_policy_bfq = { - .pd_size = sizeof(struct bfq_group), - .cpd_size = sizeof(struct bfq_group_data), - .cftypes = bfqio_files, - .pd_init_fn = bfq_pd_init, - .cpd_init_fn = bfq_cpd_init, - .pd_offline_fn = bfq_pd_offline, - .pd_reset_stats_fn = bfq_pd_reset_stats, +struct cgroup_subsys bfqio_cgrp_subsys = { + .css_alloc = bfqio_create, + .css_online = bfqio_css_online, + .css_offline = bfqio_css_offline, + .can_attach = bfqio_can_attach, + .attach = bfqio_attach, + .css_free = bfqio_destroy, + .legacy_cftypes = bfqio_files, }; - #else - -static void bfq_init_entity(struct bfq_entity *entity, - struct bfq_group *bfqg) +static inline void bfq_init_entity(struct bfq_entity *entity, + struct bfq_group *bfqg) { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; - if (bfqq) { - bfqq->ioprio = bfqq->new_ioprio; - bfqq->ioprio_class = bfqq->new_ioprio_class; - } + entity->ioprio = entity->new_ioprio; + entity->ioprio_class = entity->new_ioprio_class; entity->sched_data = &bfqg->sched_data; } -static struct bfq_group * -bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) +static inline struct bfq_group * +bfq_bic_update_cgroup(struct bfq_io_cq *bic) { struct bfq_data *bfqd = bic_to_bfqd(bic); return bfqd->root_group; } -static void bfq_bfqq_move(struct bfq_data *bfqd, - struct bfq_queue *bfqq, - struct bfq_entity *entity, - struct bfq_group *bfqg) +static inline void bfq_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct bfq_entity *entity, + struct bfq_group *bfqg) { } @@ -1084,24 +909,23 @@ static void bfq_end_wr_async(struct bfq_data *bfqd) bfq_end_wr_async_queues(bfqd, bfqd->root_group); } -static void bfq_disconnect_groups(struct bfq_data *bfqd) +static inline void bfq_disconnect_groups(struct bfq_data *bfqd) { bfq_put_async_queues(bfqd, bfqd->root_group); } -static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd, - struct blkcg *blkcg) +static inline void bfq_free_root_group(struct bfq_data *bfqd) { - return bfqd->root_group; + kfree(bfqd->root_group); } -static struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) +static struct bfq_group *bfq_alloc_root_group(struct bfq_data *bfqd, int node) { struct bfq_group *bfqg; int i; bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); - if (!bfqg) + if (bfqg == NULL) return NULL; for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) |