diff options
author | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-09-08 01:01:14 -0300 |
---|---|---|
committer | André Fabian Silva Delgado <emulatorman@parabola.nu> | 2015-09-08 01:01:14 -0300 |
commit | e5fd91f1ef340da553f7a79da9540c3db711c937 (patch) | |
tree | b11842027dc6641da63f4bcc524f8678263304a3 /block/blk-cgroup.c | |
parent | 2a9b0348e685a63d97486f6749622b61e9e3292f (diff) |
Linux-libre 4.2-gnu
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 279 |
1 files changed, 189 insertions, 90 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 6817e2896..d6283b3f5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -9,29 +9,45 @@ * * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> * Nauman Rafique <nauman@google.com> + * + * For policy-specific per-blkcg data: + * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it> + * Arianna Avanzini <avanzini.arianna@gmail.com> */ #include <linux/ioprio.h> #include <linux/kdev_t.h> #include <linux/module.h> #include <linux/err.h> #include <linux/blkdev.h> +#include <linux/backing-dev.h> #include <linux/slab.h> #include <linux/genhd.h> #include <linux/delay.h> #include <linux/atomic.h> -#include "blk-cgroup.h" +#include <linux/blk-cgroup.h> #include "blk.h" #define MAX_KEY_LEN 100 +/* + * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. + * blkcg_pol_register_mutex nests outside of it and synchronizes entire + * policy [un]register operations including cgroup file additions / + * removals. Putting cgroup file registration outside blkcg_pol_mutex + * allows grabbing it from cgroup callbacks. + */ +static DEFINE_MUTEX(blkcg_pol_register_mutex); static DEFINE_MUTEX(blkcg_pol_mutex); -struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, - .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; +struct blkcg blkcg_root; EXPORT_SYMBOL_GPL(blkcg_root); +struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; + static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; +static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -179,6 +195,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct blkcg_gq *new_blkg) { struct blkcg_gq *blkg; + struct bdi_writeback_congested *wb_congested; int i, ret; WARN_ON_ONCE(!rcu_read_lock_held()); @@ -190,22 +207,30 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, goto err_free_blkg; } + wb_congested = wb_congested_get_create(&q->backing_dev_info, + blkcg->css.id, GFP_ATOMIC); + if (!wb_congested) { + ret = -ENOMEM; + goto err_put_css; + } + /* allocate */ if (!new_blkg) { new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); if (unlikely(!new_blkg)) { ret = -ENOMEM; - goto err_put_css; + goto err_put_congested; } } blkg = new_blkg; + blkg->wb_congested = wb_congested; /* link parent */ if (blkcg_parent(blkcg)) { blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); if (WARN_ON_ONCE(!blkg->parent)) { ret = -EINVAL; - goto err_put_css; + goto err_put_congested; } blkg_get(blkg->parent); } @@ -235,18 +260,15 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, blkg->online = true; spin_unlock(&blkcg->lock); - if (!ret) { - if (blkcg == &blkcg_root) { - q->root_blkg = blkg; - q->root_rl.blkg = blkg; - } + if (!ret) return blkg; - } /* @blkg failed fully initialized, use the usual release path */ blkg_put(blkg); return ERR_PTR(ret); +err_put_congested: + wb_congested_put(wb_congested); err_put_css: css_put(&blkcg->css); err_free_blkg: @@ -340,15 +362,6 @@ static void blkg_destroy(struct blkcg_gq *blkg) rcu_assign_pointer(blkcg->blkg_hint, NULL); /* - * If root blkg is destroyed. Just clear the pointer since root_rl - * does not take reference on root blkg. - */ - if (blkcg == &blkcg_root) { - blkg->q->root_blkg = NULL; - blkg->q->root_rl.blkg = NULL; - } - - /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ @@ -402,6 +415,8 @@ void __blkg_release_rcu(struct rcu_head *rcu_head) if (blkg->parent) blkg_put(blkg->parent); + wb_congested_put(blkg->wb_congested); + blkg_free(blkg); } EXPORT_SYMBOL_GPL(__blkg_release_rcu); @@ -448,20 +463,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, struct blkcg_gq *blkg; int i; - /* - * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex - * which ends up putting cgroup's internal cgroup_tree_mutex under - * it; however, cgroup_tree_mutex is nested above cgroup file - * active protection and grabbing blkcg_pol_mutex from a cgroup - * file operation creates a possible circular dependency. cgroup - * internal locking is planned to go through further simplification - * and this issue should go away soon. For now, let's trylock - * blkcg_pol_mutex and restart the write on failure. - * - * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com - */ - if (!mutex_trylock(&blkcg_pol_mutex)) - return restart_syscall(); + mutex_lock(&blkcg_pol_mutex); spin_lock_irq(&blkcg->lock); /* @@ -813,20 +815,35 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) } spin_unlock_irq(&blkcg->lock); + + wb_blkcg_offline(blkcg); } static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); - if (blkcg != &blkcg_root) + mutex_lock(&blkcg_pol_mutex); + list_del(&blkcg->all_blkcgs_node); + mutex_unlock(&blkcg_pol_mutex); + + if (blkcg != &blkcg_root) { + int i; + + for (i = 0; i < BLKCG_MAX_POLS; i++) + kfree(blkcg->pd[i]); kfree(blkcg); + } } static struct cgroup_subsys_state * blkcg_css_alloc(struct cgroup_subsys_state *parent_css) { struct blkcg *blkcg; + struct cgroup_subsys_state *ret; + int i; + + mutex_lock(&blkcg_pol_mutex); if (!parent_css) { blkcg = &blkcg_root; @@ -834,17 +851,54 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) } blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); - if (!blkcg) - return ERR_PTR(-ENOMEM); + if (!blkcg) { + ret = ERR_PTR(-ENOMEM); + goto free_blkcg; + } + + for (i = 0; i < BLKCG_MAX_POLS ; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + struct blkcg_policy_data *cpd; + + /* + * If the policy hasn't been attached yet, wait for it + * to be attached before doing anything else. Otherwise, + * check if the policy requires any specific per-cgroup + * data: if it does, allocate and initialize it. + */ + if (!pol || !pol->cpd_size) + continue; + + BUG_ON(blkcg->pd[i]); + cpd = kzalloc(pol->cpd_size, GFP_KERNEL); + if (!cpd) { + ret = ERR_PTR(-ENOMEM); + goto free_pd_blkcg; + } + blkcg->pd[i] = cpd; + cpd->plid = i; + pol->cpd_init_fn(blkcg); + } - blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; - blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; done: spin_lock_init(&blkcg->lock); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); INIT_HLIST_HEAD(&blkcg->blkg_list); +#ifdef CONFIG_CGROUP_WRITEBACK + INIT_LIST_HEAD(&blkcg->cgwb_list); +#endif + list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); + mutex_unlock(&blkcg_pol_mutex); return &blkcg->css; + +free_pd_blkcg: + for (i--; i >= 0; i--) + kfree(blkcg->pd[i]); +free_blkcg: + kfree(blkcg); + mutex_unlock(&blkcg_pol_mutex); + return ret; } /** @@ -859,9 +913,45 @@ done: */ int blkcg_init_queue(struct request_queue *q) { - might_sleep(); + struct blkcg_gq *new_blkg, *blkg; + bool preloaded; + int ret; + + new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); + if (!new_blkg) + return -ENOMEM; + + preloaded = !radix_tree_preload(GFP_KERNEL); + + /* + * Make sure the root blkg exists and count the existing blkgs. As + * @q is bypassing at this point, blkg_lookup_create() can't be + * used. Open code insertion. + */ + rcu_read_lock(); + spin_lock_irq(q->queue_lock); + blkg = blkg_create(&blkcg_root, q, new_blkg); + spin_unlock_irq(q->queue_lock); + rcu_read_unlock(); + + if (preloaded) + radix_tree_preload_end(); - return blk_throtl_init(q); + if (IS_ERR(blkg)) { + kfree(new_blkg); + return PTR_ERR(blkg); + } + + q->root_blkg = blkg; + q->root_rl.blkg = blkg; + + ret = blk_throtl_init(q); + if (ret) { + spin_lock_irq(q->queue_lock); + blkg_destroy_all(q); + spin_unlock_irq(q->queue_lock); + } + return ret; } /** @@ -962,52 +1052,21 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { LIST_HEAD(pds); - struct blkcg_gq *blkg, *new_blkg; - struct blkg_policy_data *pd, *n; + struct blkcg_gq *blkg; + struct blkg_policy_data *pd, *nd; int cnt = 0, ret; - bool preloaded; if (blkcg_policy_enabled(q, pol)) return 0; - /* preallocations for root blkg */ - new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); - if (!new_blkg) - return -ENOMEM; - + /* count and allocate policy_data for all existing blkgs */ blk_queue_bypass_start(q); - - preloaded = !radix_tree_preload(GFP_KERNEL); - - /* - * Make sure the root blkg exists and count the existing blkgs. As - * @q is bypassing at this point, blkg_lookup_create() can't be - * used. Open code it. - */ spin_lock_irq(q->queue_lock); - - rcu_read_lock(); - blkg = __blkg_lookup(&blkcg_root, q, false); - if (blkg) - blkg_free(new_blkg); - else - blkg = blkg_create(&blkcg_root, q, new_blkg); - rcu_read_unlock(); - - if (preloaded) - radix_tree_preload_end(); - - if (IS_ERR(blkg)) { - ret = PTR_ERR(blkg); - goto out_unlock; - } - list_for_each_entry(blkg, &q->blkg_list, q_node) cnt++; - spin_unlock_irq(q->queue_lock); - /* allocate policy_data for all existing blkgs */ + /* allocate per-blkg policy data for all existing blkgs */ while (cnt--) { pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); if (!pd) { @@ -1018,7 +1077,7 @@ int blkcg_activate_policy(struct request_queue *q, } /* - * Install the allocated pds. With @q bypassing, no new blkg + * Install the allocated pds and cpds. With @q bypassing, no new blkg * should have been created while the queue lock was dropped. */ spin_lock_irq(q->queue_lock); @@ -1049,7 +1108,7 @@ out_unlock: spin_unlock_irq(q->queue_lock); out_free: blk_queue_bypass_end(q); - list_for_each_entry_safe(pd, n, &pds, alloc_node) + list_for_each_entry_safe(pd, nd, &pds, alloc_node) kfree(pd); return ret; } @@ -1076,10 +1135,6 @@ void blkcg_deactivate_policy(struct request_queue *q, __clear_bit(pol->plid, q->blkcg_pols); - /* if no policy is left, no need for blkgs - shoot them down */ - if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS)) - blkg_destroy_all(q); - list_for_each_entry(blkg, &q->blkg_list, q_node) { /* grab blkcg lock too while removing @pd from @blkg */ spin_lock(&blkg->blkcg->lock); @@ -1109,11 +1164,13 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); */ int blkcg_policy_register(struct blkcg_policy *pol) { + struct blkcg *blkcg; int i, ret; if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) return -EINVAL; + mutex_lock(&blkcg_pol_register_mutex); mutex_lock(&blkcg_pol_mutex); /* find an empty slot */ @@ -1122,19 +1179,49 @@ int blkcg_policy_register(struct blkcg_policy *pol) if (!blkcg_policy[i]) break; if (i >= BLKCG_MAX_POLS) - goto out_unlock; + goto err_unlock; - /* register and update blkgs */ + /* register @pol */ pol->plid = i; - blkcg_policy[i] = pol; + blkcg_policy[pol->plid] = pol; + + /* allocate and install cpd's */ + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + struct blkcg_policy_data *cpd; + + cpd = kzalloc(pol->cpd_size, GFP_KERNEL); + if (!cpd) { + mutex_unlock(&blkcg_pol_mutex); + goto err_free_cpds; + } + + blkcg->pd[pol->plid] = cpd; + cpd->plid = pol->plid; + pol->cpd_init_fn(blkcg); + } + } + + mutex_unlock(&blkcg_pol_mutex); /* everything is in place, add intf files for the new policy */ if (pol->cftypes) WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys, pol->cftypes)); - ret = 0; -out_unlock: + mutex_unlock(&blkcg_pol_register_mutex); + return 0; + +err_free_cpds: + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + kfree(blkcg->pd[pol->plid]); + blkcg->pd[pol->plid] = NULL; + } + } + blkcg_policy[pol->plid] = NULL; +err_unlock: mutex_unlock(&blkcg_pol_mutex); + mutex_unlock(&blkcg_pol_register_mutex); return ret; } EXPORT_SYMBOL_GPL(blkcg_policy_register); @@ -1147,7 +1234,9 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { - mutex_lock(&blkcg_pol_mutex); + struct blkcg *blkcg; + + mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) goto out_unlock; @@ -1156,9 +1245,19 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) if (pol->cftypes) cgroup_rm_cftypes(pol->cftypes); - /* unregister and update blkgs */ + /* remove cpds and unregister */ + mutex_lock(&blkcg_pol_mutex); + + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + kfree(blkcg->pd[pol->plid]); + blkcg->pd[pol->plid] = NULL; + } + } blkcg_policy[pol->plid] = NULL; -out_unlock: + mutex_unlock(&blkcg_pol_mutex); +out_unlock: + mutex_unlock(&blkcg_pol_register_mutex); } EXPORT_SYMBOL_GPL(blkcg_policy_unregister); |