summaryrefslogtreecommitdiff
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-09-08 01:01:14 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-09-08 01:01:14 -0300
commite5fd91f1ef340da553f7a79da9540c3db711c937 (patch)
treeb11842027dc6641da63f4bcc524f8678263304a3 /block/blk-cgroup.c
parent2a9b0348e685a63d97486f6749622b61e9e3292f (diff)
Linux-libre 4.2-gnu
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c279
1 files changed, 189 insertions, 90 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 6817e2896..d6283b3f5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -9,29 +9,45 @@
*
* Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
* Nauman Rafique <nauman@google.com>
+ *
+ * For policy-specific per-blkcg data:
+ * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
+ * Arianna Avanzini <avanzini.arianna@gmail.com>
*/
#include <linux/ioprio.h>
#include <linux/kdev_t.h>
#include <linux/module.h>
#include <linux/err.h>
#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
#include <linux/slab.h>
#include <linux/genhd.h>
#include <linux/delay.h>
#include <linux/atomic.h>
-#include "blk-cgroup.h"
+#include <linux/blk-cgroup.h>
#include "blk.h"
#define MAX_KEY_LEN 100
+/*
+ * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
+ * blkcg_pol_register_mutex nests outside of it and synchronizes entire
+ * policy [un]register operations including cgroup file additions /
+ * removals. Putting cgroup file registration outside blkcg_pol_mutex
+ * allows grabbing it from cgroup callbacks.
+ */
+static DEFINE_MUTEX(blkcg_pol_register_mutex);
static DEFINE_MUTEX(blkcg_pol_mutex);
-struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT,
- .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
+struct blkcg blkcg_root;
EXPORT_SYMBOL_GPL(blkcg_root);
+struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
+
static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
+static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
+
static bool blkcg_policy_enabled(struct request_queue *q,
const struct blkcg_policy *pol)
{
@@ -179,6 +195,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct blkcg_gq *new_blkg)
{
struct blkcg_gq *blkg;
+ struct bdi_writeback_congested *wb_congested;
int i, ret;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -190,22 +207,30 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
goto err_free_blkg;
}
+ wb_congested = wb_congested_get_create(&q->backing_dev_info,
+ blkcg->css.id, GFP_ATOMIC);
+ if (!wb_congested) {
+ ret = -ENOMEM;
+ goto err_put_css;
+ }
+
/* allocate */
if (!new_blkg) {
new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
if (unlikely(!new_blkg)) {
ret = -ENOMEM;
- goto err_put_css;
+ goto err_put_congested;
}
}
blkg = new_blkg;
+ blkg->wb_congested = wb_congested;
/* link parent */
if (blkcg_parent(blkcg)) {
blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
if (WARN_ON_ONCE(!blkg->parent)) {
ret = -EINVAL;
- goto err_put_css;
+ goto err_put_congested;
}
blkg_get(blkg->parent);
}
@@ -235,18 +260,15 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg->online = true;
spin_unlock(&blkcg->lock);
- if (!ret) {
- if (blkcg == &blkcg_root) {
- q->root_blkg = blkg;
- q->root_rl.blkg = blkg;
- }
+ if (!ret)
return blkg;
- }
/* @blkg failed fully initialized, use the usual release path */
blkg_put(blkg);
return ERR_PTR(ret);
+err_put_congested:
+ wb_congested_put(wb_congested);
err_put_css:
css_put(&blkcg->css);
err_free_blkg:
@@ -340,15 +362,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
rcu_assign_pointer(blkcg->blkg_hint, NULL);
/*
- * If root blkg is destroyed. Just clear the pointer since root_rl
- * does not take reference on root blkg.
- */
- if (blkcg == &blkcg_root) {
- blkg->q->root_blkg = NULL;
- blkg->q->root_rl.blkg = NULL;
- }
-
- /*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
*/
@@ -402,6 +415,8 @@ void __blkg_release_rcu(struct rcu_head *rcu_head)
if (blkg->parent)
blkg_put(blkg->parent);
+ wb_congested_put(blkg->wb_congested);
+
blkg_free(blkg);
}
EXPORT_SYMBOL_GPL(__blkg_release_rcu);
@@ -448,20 +463,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
struct blkcg_gq *blkg;
int i;
- /*
- * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
- * which ends up putting cgroup's internal cgroup_tree_mutex under
- * it; however, cgroup_tree_mutex is nested above cgroup file
- * active protection and grabbing blkcg_pol_mutex from a cgroup
- * file operation creates a possible circular dependency. cgroup
- * internal locking is planned to go through further simplification
- * and this issue should go away soon. For now, let's trylock
- * blkcg_pol_mutex and restart the write on failure.
- *
- * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
- */
- if (!mutex_trylock(&blkcg_pol_mutex))
- return restart_syscall();
+ mutex_lock(&blkcg_pol_mutex);
spin_lock_irq(&blkcg->lock);
/*
@@ -813,20 +815,35 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
}
spin_unlock_irq(&blkcg->lock);
+
+ wb_blkcg_offline(blkcg);
}
static void blkcg_css_free(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
- if (blkcg != &blkcg_root)
+ mutex_lock(&blkcg_pol_mutex);
+ list_del(&blkcg->all_blkcgs_node);
+ mutex_unlock(&blkcg_pol_mutex);
+
+ if (blkcg != &blkcg_root) {
+ int i;
+
+ for (i = 0; i < BLKCG_MAX_POLS; i++)
+ kfree(blkcg->pd[i]);
kfree(blkcg);
+ }
}
static struct cgroup_subsys_state *
blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct blkcg *blkcg;
+ struct cgroup_subsys_state *ret;
+ int i;
+
+ mutex_lock(&blkcg_pol_mutex);
if (!parent_css) {
blkcg = &blkcg_root;
@@ -834,17 +851,54 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
}
blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
- if (!blkcg)
- return ERR_PTR(-ENOMEM);
+ if (!blkcg) {
+ ret = ERR_PTR(-ENOMEM);
+ goto free_blkcg;
+ }
+
+ for (i = 0; i < BLKCG_MAX_POLS ; i++) {
+ struct blkcg_policy *pol = blkcg_policy[i];
+ struct blkcg_policy_data *cpd;
+
+ /*
+ * If the policy hasn't been attached yet, wait for it
+ * to be attached before doing anything else. Otherwise,
+ * check if the policy requires any specific per-cgroup
+ * data: if it does, allocate and initialize it.
+ */
+ if (!pol || !pol->cpd_size)
+ continue;
+
+ BUG_ON(blkcg->pd[i]);
+ cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+ if (!cpd) {
+ ret = ERR_PTR(-ENOMEM);
+ goto free_pd_blkcg;
+ }
+ blkcg->pd[i] = cpd;
+ cpd->plid = i;
+ pol->cpd_init_fn(blkcg);
+ }
- blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
- blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
done:
spin_lock_init(&blkcg->lock);
INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
INIT_HLIST_HEAD(&blkcg->blkg_list);
+#ifdef CONFIG_CGROUP_WRITEBACK
+ INIT_LIST_HEAD(&blkcg->cgwb_list);
+#endif
+ list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
+ mutex_unlock(&blkcg_pol_mutex);
return &blkcg->css;
+
+free_pd_blkcg:
+ for (i--; i >= 0; i--)
+ kfree(blkcg->pd[i]);
+free_blkcg:
+ kfree(blkcg);
+ mutex_unlock(&blkcg_pol_mutex);
+ return ret;
}
/**
@@ -859,9 +913,45 @@ done:
*/
int blkcg_init_queue(struct request_queue *q)
{
- might_sleep();
+ struct blkcg_gq *new_blkg, *blkg;
+ bool preloaded;
+ int ret;
+
+ new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
+ if (!new_blkg)
+ return -ENOMEM;
+
+ preloaded = !radix_tree_preload(GFP_KERNEL);
+
+ /*
+ * Make sure the root blkg exists and count the existing blkgs. As
+ * @q is bypassing at this point, blkg_lookup_create() can't be
+ * used. Open code insertion.
+ */
+ rcu_read_lock();
+ spin_lock_irq(q->queue_lock);
+ blkg = blkg_create(&blkcg_root, q, new_blkg);
+ spin_unlock_irq(q->queue_lock);
+ rcu_read_unlock();
+
+ if (preloaded)
+ radix_tree_preload_end();
- return blk_throtl_init(q);
+ if (IS_ERR(blkg)) {
+ kfree(new_blkg);
+ return PTR_ERR(blkg);
+ }
+
+ q->root_blkg = blkg;
+ q->root_rl.blkg = blkg;
+
+ ret = blk_throtl_init(q);
+ if (ret) {
+ spin_lock_irq(q->queue_lock);
+ blkg_destroy_all(q);
+ spin_unlock_irq(q->queue_lock);
+ }
+ return ret;
}
/**
@@ -962,52 +1052,21 @@ int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol)
{
LIST_HEAD(pds);
- struct blkcg_gq *blkg, *new_blkg;
- struct blkg_policy_data *pd, *n;
+ struct blkcg_gq *blkg;
+ struct blkg_policy_data *pd, *nd;
int cnt = 0, ret;
- bool preloaded;
if (blkcg_policy_enabled(q, pol))
return 0;
- /* preallocations for root blkg */
- new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
- if (!new_blkg)
- return -ENOMEM;
-
+ /* count and allocate policy_data for all existing blkgs */
blk_queue_bypass_start(q);
-
- preloaded = !radix_tree_preload(GFP_KERNEL);
-
- /*
- * Make sure the root blkg exists and count the existing blkgs. As
- * @q is bypassing at this point, blkg_lookup_create() can't be
- * used. Open code it.
- */
spin_lock_irq(q->queue_lock);
-
- rcu_read_lock();
- blkg = __blkg_lookup(&blkcg_root, q, false);
- if (blkg)
- blkg_free(new_blkg);
- else
- blkg = blkg_create(&blkcg_root, q, new_blkg);
- rcu_read_unlock();
-
- if (preloaded)
- radix_tree_preload_end();
-
- if (IS_ERR(blkg)) {
- ret = PTR_ERR(blkg);
- goto out_unlock;
- }
-
list_for_each_entry(blkg, &q->blkg_list, q_node)
cnt++;
-
spin_unlock_irq(q->queue_lock);
- /* allocate policy_data for all existing blkgs */
+ /* allocate per-blkg policy data for all existing blkgs */
while (cnt--) {
pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
if (!pd) {
@@ -1018,7 +1077,7 @@ int blkcg_activate_policy(struct request_queue *q,
}
/*
- * Install the allocated pds. With @q bypassing, no new blkg
+ * Install the allocated pds and cpds. With @q bypassing, no new blkg
* should have been created while the queue lock was dropped.
*/
spin_lock_irq(q->queue_lock);
@@ -1049,7 +1108,7 @@ out_unlock:
spin_unlock_irq(q->queue_lock);
out_free:
blk_queue_bypass_end(q);
- list_for_each_entry_safe(pd, n, &pds, alloc_node)
+ list_for_each_entry_safe(pd, nd, &pds, alloc_node)
kfree(pd);
return ret;
}
@@ -1076,10 +1135,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
__clear_bit(pol->plid, q->blkcg_pols);
- /* if no policy is left, no need for blkgs - shoot them down */
- if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
- blkg_destroy_all(q);
-
list_for_each_entry(blkg, &q->blkg_list, q_node) {
/* grab blkcg lock too while removing @pd from @blkg */
spin_lock(&blkg->blkcg->lock);
@@ -1109,11 +1164,13 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
*/
int blkcg_policy_register(struct blkcg_policy *pol)
{
+ struct blkcg *blkcg;
int i, ret;
if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
return -EINVAL;
+ mutex_lock(&blkcg_pol_register_mutex);
mutex_lock(&blkcg_pol_mutex);
/* find an empty slot */
@@ -1122,19 +1179,49 @@ int blkcg_policy_register(struct blkcg_policy *pol)
if (!blkcg_policy[i])
break;
if (i >= BLKCG_MAX_POLS)
- goto out_unlock;
+ goto err_unlock;
- /* register and update blkgs */
+ /* register @pol */
pol->plid = i;
- blkcg_policy[i] = pol;
+ blkcg_policy[pol->plid] = pol;
+
+ /* allocate and install cpd's */
+ if (pol->cpd_size) {
+ list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
+ struct blkcg_policy_data *cpd;
+
+ cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+ if (!cpd) {
+ mutex_unlock(&blkcg_pol_mutex);
+ goto err_free_cpds;
+ }
+
+ blkcg->pd[pol->plid] = cpd;
+ cpd->plid = pol->plid;
+ pol->cpd_init_fn(blkcg);
+ }
+ }
+
+ mutex_unlock(&blkcg_pol_mutex);
/* everything is in place, add intf files for the new policy */
if (pol->cftypes)
WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys,
pol->cftypes));
- ret = 0;
-out_unlock:
+ mutex_unlock(&blkcg_pol_register_mutex);
+ return 0;
+
+err_free_cpds:
+ if (pol->cpd_size) {
+ list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
+ kfree(blkcg->pd[pol->plid]);
+ blkcg->pd[pol->plid] = NULL;
+ }
+ }
+ blkcg_policy[pol->plid] = NULL;
+err_unlock:
mutex_unlock(&blkcg_pol_mutex);
+ mutex_unlock(&blkcg_pol_register_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(blkcg_policy_register);
@@ -1147,7 +1234,9 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register);
*/
void blkcg_policy_unregister(struct blkcg_policy *pol)
{
- mutex_lock(&blkcg_pol_mutex);
+ struct blkcg *blkcg;
+
+ mutex_lock(&blkcg_pol_register_mutex);
if (WARN_ON(blkcg_policy[pol->plid] != pol))
goto out_unlock;
@@ -1156,9 +1245,19 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
if (pol->cftypes)
cgroup_rm_cftypes(pol->cftypes);
- /* unregister and update blkgs */
+ /* remove cpds and unregister */
+ mutex_lock(&blkcg_pol_mutex);
+
+ if (pol->cpd_size) {
+ list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
+ kfree(blkcg->pd[pol->plid]);
+ blkcg->pd[pol->plid] = NULL;
+ }
+ }
blkcg_policy[pol->plid] = NULL;
-out_unlock:
+
mutex_unlock(&blkcg_pol_mutex);
+out_unlock:
+ mutex_unlock(&blkcg_pol_register_mutex);
}
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);