summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bio.c62
-rw-r--r--block/blk-core.c26
-rw-r--r--block/blk-map.c47
-rw-r--r--block/blk-mq-sysfs.c15
-rw-r--r--block/blk-mq.c187
-rw-r--r--block/blk-mq.h1
-rw-r--r--block/blk-settings.c12
-rw-r--r--block/blk-sysfs.c8
-rw-r--r--block/cfq-iosched.c45
-rw-r--r--block/compat_ioctl.c4
-rw-r--r--block/ioctl.c4
-rw-r--r--block/partition-generic.c19
12 files changed, 253 insertions, 177 deletions
diff --git a/block/bio.c b/block/bio.c
index cf7591551..807d25e46 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -296,13 +296,19 @@ void bio_reset(struct bio *bio)
}
EXPORT_SYMBOL(bio_reset);
-static void bio_chain_endio(struct bio *bio)
+static struct bio *__bio_chain_endio(struct bio *bio)
{
struct bio *parent = bio->bi_private;
- parent->bi_error = bio->bi_error;
- bio_endio(parent);
+ if (!parent->bi_error)
+ parent->bi_error = bio->bi_error;
bio_put(bio);
+ return parent;
+}
+
+static void bio_chain_endio(struct bio *bio)
+{
+ bio_endio(__bio_chain_endio(bio));
}
/*
@@ -1333,7 +1339,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
* release the pages we didn't map into the bio, if any
*/
while (j < page_limit)
- page_cache_release(pages[j++]);
+ put_page(pages[j++]);
}
kfree(pages);
@@ -1359,7 +1365,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
for (j = 0; j < nr_pages; j++) {
if (!pages[j])
break;
- page_cache_release(pages[j]);
+ put_page(pages[j]);
}
out:
kfree(pages);
@@ -1379,7 +1385,7 @@ static void __bio_unmap_user(struct bio *bio)
if (bio_data_dir(bio) == READ)
set_page_dirty_lock(bvec->bv_page);
- page_cache_release(bvec->bv_page);
+ put_page(bvec->bv_page);
}
bio_put(bio);
@@ -1609,8 +1615,8 @@ static void bio_release_pages(struct bio *bio)
* the BIO and the offending pages and re-dirty the pages in process context.
*
* It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on. It will run one page_cache_release() against each page and will
- * run one bio_put() against the BIO.
+ * here on. It will run one put_page() against each page and will run one
+ * bio_put() against the BIO.
*/
static void bio_dirty_fn(struct work_struct *work);
@@ -1652,7 +1658,7 @@ void bio_check_pages_dirty(struct bio *bio)
struct page *page = bvec->bv_page;
if (PageDirty(page) || PageCompound(page)) {
- page_cache_release(page);
+ put_page(page);
bvec->bv_page = NULL;
} else {
nr_clean_pages++;
@@ -1742,29 +1748,25 @@ static inline bool bio_remaining_done(struct bio *bio)
**/
void bio_endio(struct bio *bio)
{
- while (bio) {
- if (unlikely(!bio_remaining_done(bio)))
- break;
+again:
+ if (!bio_remaining_done(bio))
+ return;
- /*
- * Need to have a real endio function for chained bios,
- * otherwise various corner cases will break (like stacking
- * block devices that save/restore bi_end_io) - however, we want
- * to avoid unbounded recursion and blowing the stack. Tail call
- * optimization would handle this, but compiling with frame
- * pointers also disables gcc's sibling call optimization.
- */
- if (bio->bi_end_io == bio_chain_endio) {
- struct bio *parent = bio->bi_private;
- parent->bi_error = bio->bi_error;
- bio_put(bio);
- bio = parent;
- } else {
- if (bio->bi_end_io)
- bio->bi_end_io(bio);
- bio = NULL;
- }
+ /*
+ * Need to have a real endio function for chained bios, otherwise
+ * various corner cases will break (like stacking block devices that
+ * save/restore bi_end_io) - however, we want to avoid unbounded
+ * recursion and blowing the stack. Tail call optimization would
+ * handle this, but compiling with frame pointers also disables
+ * gcc's sibling call optimization.
+ */
+ if (bio->bi_end_io == bio_chain_endio) {
+ bio = __bio_chain_endio(bio);
+ goto again;
}
+
+ if (bio->bi_end_io)
+ bio->bi_end_io(bio);
}
EXPORT_SYMBOL(bio_endio);
diff --git a/block/blk-core.c b/block/blk-core.c
index f98581f9e..88d6e981e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -708,7 +708,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
goto fail_id;
q->backing_dev_info.ra_pages =
- (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+ (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK;
q->backing_dev_info.name = "block";
q->node = node_id;
@@ -3534,6 +3534,30 @@ void blk_post_runtime_resume(struct request_queue *q, int err)
spin_unlock_irq(q->queue_lock);
}
EXPORT_SYMBOL(blk_post_runtime_resume);
+
+/**
+ * blk_set_runtime_active - Force runtime status of the queue to be active
+ * @q: the queue of the device
+ *
+ * If the device is left runtime suspended during system suspend the resume
+ * hook typically resumes the device and corrects runtime status
+ * accordingly. However, that does not affect the queue runtime PM status
+ * which is still "suspended". This prevents processing requests from the
+ * queue.
+ *
+ * This function can be used in driver's resume hook to correct queue
+ * runtime PM status and re-enable peeking requests from the queue. It
+ * should be called before first request is added to the queue.
+ */
+void blk_set_runtime_active(struct request_queue *q)
+{
+ spin_lock_irq(q->queue_lock);
+ q->rpm_status = RPM_ACTIVE;
+ pm_runtime_mark_last_busy(q->dev);
+ pm_request_autosuspend(q->dev);
+ spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_set_runtime_active);
#endif
int __init blk_dev_init(void)
diff --git a/block/blk-map.c b/block/blk-map.c
index a54f0543b..b9f88b775 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -9,24 +9,6 @@
#include "blk.h"
-static bool iovec_gap_to_prv(struct request_queue *q,
- struct iovec *prv, struct iovec *cur)
-{
- unsigned long prev_end;
-
- if (!queue_virt_boundary(q))
- return false;
-
- if (prv->iov_base == NULL && prv->iov_len == 0)
- /* prv is not set - don't check */
- return false;
-
- prev_end = (unsigned long)(prv->iov_base + prv->iov_len);
-
- return (((unsigned long)cur->iov_base & queue_virt_boundary(q)) ||
- prev_end & queue_virt_boundary(q));
-}
-
int blk_rq_append_bio(struct request_queue *q, struct request *rq,
struct bio *bio)
{
@@ -125,31 +107,18 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
struct rq_map_data *map_data,
const struct iov_iter *iter, gfp_t gfp_mask)
{
- struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0};
- bool copy = (q->dma_pad_mask & iter->count) || map_data;
+ bool copy = false;
+ unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
struct bio *bio = NULL;
struct iov_iter i;
int ret;
- if (!iter || !iter->count)
- return -EINVAL;
-
- iov_for_each(iov, i, *iter) {
- unsigned long uaddr = (unsigned long) iov.iov_base;
-
- if (!iov.iov_len)
- return -EINVAL;
-
- /*
- * Keep going so we check length of all segments
- */
- if ((uaddr & queue_dma_alignment(q)) ||
- iovec_gap_to_prv(q, &prv, &iov))
- copy = true;
-
- prv.iov_base = iov.iov_base;
- prv.iov_len = iov.iov_len;
- }
+ if (map_data)
+ copy = true;
+ else if (iov_iter_alignment(iter) & align)
+ copy = true;
+ else if (queue_virt_boundary(q))
+ copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
i = *iter;
do {
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 1cf18784c..4ea4dd8a1 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -408,19 +408,22 @@ void blk_mq_unregister_disk(struct gendisk *disk)
blk_mq_enable_hotplug();
}
+void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx)
+{
+ kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
+}
+
static void blk_mq_sysfs_init(struct request_queue *q)
{
- struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
- int i;
+ int cpu;
kobject_init(&q->mq_kobj, &blk_mq_ktype);
- queue_for_each_hw_ctx(q, hctx, i)
- kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
-
- queue_for_each_ctx(q, ctx, i)
+ for_each_possible_cpu(cpu) {
+ ctx = per_cpu_ptr(q->queue_ctx, cpu);
kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
+ }
}
int blk_mq_register_disk(struct gendisk *disk)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 56c0a726b..1699baf39 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -544,7 +544,10 @@ EXPORT_SYMBOL(blk_mq_abort_requeue_list);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{
- return tags->rqs[tag];
+ if (tag < tags->nr_tags)
+ return tags->rqs[tag];
+
+ return NULL;
}
EXPORT_SYMBOL(blk_mq_tag_to_rq);
@@ -1744,31 +1747,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
return -1;
}
-static int blk_mq_init_hw_queues(struct request_queue *q,
- struct blk_mq_tag_set *set)
-{
- struct blk_mq_hw_ctx *hctx;
- unsigned int i;
-
- /*
- * Initialize hardware queues
- */
- queue_for_each_hw_ctx(q, hctx, i) {
- if (blk_mq_init_hctx(q, set, hctx, i))
- break;
- }
-
- if (i == q->nr_hw_queues)
- return 0;
-
- /*
- * Init failed
- */
- blk_mq_exit_hw_queues(q, set, i);
-
- return 1;
-}
-
static void blk_mq_init_cpu_queues(struct request_queue *q,
unsigned int nr_hw_queues)
{
@@ -1820,12 +1798,14 @@ static void blk_mq_map_swqueue(struct request_queue *q,
/*
* Map software to hardware queues
*/
- queue_for_each_ctx(q, ctx, i) {
+ for_each_possible_cpu(i) {
/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpumask_test_cpu(i, online_mask))
continue;
+ ctx = per_cpu_ptr(q->queue_ctx, i);
hctx = q->mq_ops->map_queue(q, i);
+
cpumask_set_cpu(i, hctx->cpumask);
ctx->index_hw = hctx->nr_ctx;
hctx->ctxs[hctx->nr_ctx++] = ctx;
@@ -1974,56 +1954,93 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
}
EXPORT_SYMBOL(blk_mq_init_queue);
-struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
- struct request_queue *q)
+static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
+ struct request_queue *q)
{
- struct blk_mq_hw_ctx **hctxs;
- struct blk_mq_ctx __percpu *ctx;
- unsigned int *map;
- int i;
-
- ctx = alloc_percpu(struct blk_mq_ctx);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
-
- hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
- set->numa_node);
-
- if (!hctxs)
- goto err_percpu;
-
- map = blk_mq_make_queue_map(set);
- if (!map)
- goto err_map;
+ int i, j;
+ struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
+ blk_mq_sysfs_unregister(q);
for (i = 0; i < set->nr_hw_queues; i++) {
- int node = blk_mq_hw_queue_to_node(map, i);
+ int node;
+
+ if (hctxs[i])
+ continue;
+ node = blk_mq_hw_queue_to_node(q->mq_map, i);
hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
GFP_KERNEL, node);
if (!hctxs[i])
- goto err_hctxs;
+ break;
if (!zalloc_cpumask_var_node(&hctxs[i]->cpumask, GFP_KERNEL,
- node))
- goto err_hctxs;
+ node)) {
+ kfree(hctxs[i]);
+ hctxs[i] = NULL;
+ break;
+ }
atomic_set(&hctxs[i]->nr_active, 0);
hctxs[i]->numa_node = node;
hctxs[i]->queue_num = i;
+
+ if (blk_mq_init_hctx(q, set, hctxs[i], i)) {
+ free_cpumask_var(hctxs[i]->cpumask);
+ kfree(hctxs[i]);
+ hctxs[i] = NULL;
+ break;
+ }
+ blk_mq_hctx_kobj_init(hctxs[i]);
+ }
+ for (j = i; j < q->nr_hw_queues; j++) {
+ struct blk_mq_hw_ctx *hctx = hctxs[j];
+
+ if (hctx) {
+ if (hctx->tags) {
+ blk_mq_free_rq_map(set, hctx->tags, j);
+ set->tags[j] = NULL;
+ }
+ blk_mq_exit_hctx(q, set, hctx, j);
+ free_cpumask_var(hctx->cpumask);
+ kobject_put(&hctx->kobj);
+ kfree(hctx->ctxs);
+ kfree(hctx);
+ hctxs[j] = NULL;
+
+ }
}
+ q->nr_hw_queues = i;
+ blk_mq_sysfs_register(q);
+}
+
+struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+ struct request_queue *q)
+{
+ /* mark the queue as mq asap */
+ q->mq_ops = set->ops;
+
+ q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
+ if (!q->queue_ctx)
+ return ERR_PTR(-ENOMEM);
+
+ q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
+ GFP_KERNEL, set->numa_node);
+ if (!q->queue_hw_ctx)
+ goto err_percpu;
+
+ q->mq_map = blk_mq_make_queue_map(set);
+ if (!q->mq_map)
+ goto err_map;
+
+ blk_mq_realloc_hw_ctxs(set, q);
+ if (!q->nr_hw_queues)
+ goto err_hctxs;
INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
q->nr_queues = nr_cpu_ids;
- q->nr_hw_queues = set->nr_hw_queues;
- q->mq_map = map;
- q->queue_ctx = ctx;
- q->queue_hw_ctx = hctxs;
-
- q->mq_ops = set->ops;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
if (!(set->flags & BLK_MQ_F_SG_MERGE))
@@ -2050,9 +2067,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
- if (blk_mq_init_hw_queues(q, set))
- goto err_hctxs;
-
get_online_cpus();
mutex_lock(&all_q_mutex);
@@ -2066,17 +2080,11 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
return q;
err_hctxs:
- kfree(map);
- for (i = 0; i < set->nr_hw_queues; i++) {
- if (!hctxs[i])
- break;
- free_cpumask_var(hctxs[i]->cpumask);
- kfree(hctxs[i]);
- }
+ kfree(q->mq_map);
err_map:
- kfree(hctxs);
+ kfree(q->queue_hw_ctx);
err_percpu:
- free_percpu(ctx);
+ free_percpu(q->queue_ctx);
return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(blk_mq_init_allocated_queue);
@@ -2284,9 +2292,13 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
set->nr_hw_queues = 1;
set->queue_depth = min(64U, set->queue_depth);
}
+ /*
+ * There is no use for more h/w queues than cpus.
+ */
+ if (set->nr_hw_queues > nr_cpu_ids)
+ set->nr_hw_queues = nr_cpu_ids;
- set->tags = kmalloc_node(set->nr_hw_queues *
- sizeof(struct blk_mq_tags *),
+ set->tags = kzalloc_node(nr_cpu_ids * sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
if (!set->tags)
return -ENOMEM;
@@ -2309,7 +2321,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
{
int i;
- for (i = 0; i < set->nr_hw_queues; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (set->tags[i])
blk_mq_free_rq_map(set, set->tags[i], i);
}
@@ -2330,6 +2342,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
ret = 0;
queue_for_each_hw_ctx(q, hctx, i) {
+ if (!hctx->tags)
+ continue;
ret = blk_mq_tag_update_depth(hctx->tags, nr);
if (ret)
break;
@@ -2341,6 +2355,35 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
return ret;
}
+void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
+{
+ struct request_queue *q;
+
+ if (nr_hw_queues > nr_cpu_ids)
+ nr_hw_queues = nr_cpu_ids;
+ if (nr_hw_queues < 1 || nr_hw_queues == set->nr_hw_queues)
+ return;
+
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ blk_mq_freeze_queue(q);
+
+ set->nr_hw_queues = nr_hw_queues;
+ list_for_each_entry(q, &set->tag_list, tag_set_list) {
+ blk_mq_realloc_hw_ctxs(set, q);
+
+ if (q->nr_hw_queues > 1)
+ blk_queue_make_request(q, blk_mq_make_request);
+ else
+ blk_queue_make_request(q, blk_sq_make_request);
+
+ blk_mq_queue_reinit(q, cpu_online_mask);
+ }
+
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ blk_mq_unfreeze_queue(q);
+}
+EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
+
void blk_mq_disable_hotplug(void)
{
mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index eaede8e45..9087b1103 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -57,6 +57,7 @@ extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
*/
extern int blk_mq_sysfs_register(struct request_queue *q);
extern void blk_mq_sysfs_unregister(struct request_queue *q);
+extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
extern void blk_mq_rq_timed_out(struct request *req, bool reserved);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c7bb666aa..331e4eee0 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -239,8 +239,8 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
struct queue_limits *limits = &q->limits;
unsigned int max_sectors;
- if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
- max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
+ if ((max_hw_sectors << 9) < PAGE_SIZE) {
+ max_hw_sectors = 1 << (PAGE_SHIFT - 9);
printk(KERN_INFO "%s: set to minimum %d\n",
__func__, max_hw_sectors);
}
@@ -329,8 +329,8 @@ EXPORT_SYMBOL(blk_queue_max_segments);
**/
void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size)
{
- if (max_size < PAGE_CACHE_SIZE) {
- max_size = PAGE_CACHE_SIZE;
+ if (max_size < PAGE_SIZE) {
+ max_size = PAGE_SIZE;
printk(KERN_INFO "%s: set to minimum %d\n",
__func__, max_size);
}
@@ -760,8 +760,8 @@ EXPORT_SYMBOL_GPL(blk_queue_dma_drain);
**/
void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask)
{
- if (mask < PAGE_CACHE_SIZE - 1) {
- mask = PAGE_CACHE_SIZE - 1;
+ if (mask < PAGE_SIZE - 1) {
+ mask = PAGE_SIZE - 1;
printk(KERN_INFO "%s: set to minimum %lx\n",
__func__, mask);
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index dd9376305..995b58d46 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -76,7 +76,7 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
static ssize_t queue_ra_show(struct request_queue *q, char *page)
{
unsigned long ra_kb = q->backing_dev_info.ra_pages <<
- (PAGE_CACHE_SHIFT - 10);
+ (PAGE_SHIFT - 10);
return queue_var_show(ra_kb, (page));
}
@@ -90,7 +90,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
if (ret < 0)
return ret;
- q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
+ q->backing_dev_info.ra_pages = ra_kb >> (PAGE_SHIFT - 10);
return ret;
}
@@ -117,7 +117,7 @@ static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
if (blk_queue_cluster(q))
return queue_var_show(queue_max_segment_size(q), (page));
- return queue_var_show(PAGE_CACHE_SIZE, (page));
+ return queue_var_show(PAGE_SIZE, (page));
}
static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
@@ -198,7 +198,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
unsigned long max_sectors_kb,
max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
- page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
+ page_kb = 1 << (PAGE_SHIFT - 10);
ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
if (ret < 0)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1f9093e90..4a349787b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -632,6 +632,13 @@ static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg)
return pblkg ? blkg_to_cfqg(pblkg) : NULL;
}
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+ struct cfq_group *ancestor)
+{
+ return cgroup_is_descendant(cfqg_to_blkg(cfqg)->blkcg->css.cgroup,
+ cfqg_to_blkg(ancestor)->blkcg->css.cgroup);
+}
+
static inline void cfqg_get(struct cfq_group *cfqg)
{
return blkg_get(cfqg_to_blkg(cfqg));
@@ -758,6 +765,11 @@ static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
#else /* CONFIG_CFQ_GROUP_IOSCHED */
static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; }
+static inline bool cfqg_is_descendant(struct cfq_group *cfqg,
+ struct cfq_group *ancestor)
+{
+ return true;
+}
static inline void cfqg_get(struct cfq_group *cfqg) { }
static inline void cfqg_put(struct cfq_group *cfqg) { }
@@ -2897,6 +2909,7 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq = cfqd->active_queue;
+ struct cfq_rb_root *st = cfqq->service_tree;
struct cfq_io_cq *cic;
unsigned long sl, group_idle = 0;
@@ -2947,8 +2960,13 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
return;
}
- /* There are other queues in the group, don't do group idle */
- if (group_idle && cfqq->cfqg->nr_cfqq > 1)
+ /*
+ * There are other queues in the group or this is the only group and
+ * it has too big thinktime, don't do group idle.
+ */
+ if (group_idle &&
+ (cfqq->cfqg->nr_cfqq > 1 ||
+ cfq_io_thinktime_big(cfqd, &st->ttime, true)))
return;
cfq_mark_cfqq_wait_request(cfqq);
@@ -3947,16 +3965,27 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
return true;
- if (new_cfqq->cfqg != cfqq->cfqg)
+ /*
+ * Treat ancestors of current cgroup the same way as current cgroup.
+ * For anybody else we disallow preemption to guarantee service
+ * fairness among cgroups.
+ */
+ if (!cfqg_is_descendant(cfqq->cfqg, new_cfqq->cfqg))
return false;
if (cfq_slice_used(cfqq))
return true;
+ /*
+ * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
+ */
+ if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
+ return true;
+
+ WARN_ON_ONCE(cfqq->ioprio_class != new_cfqq->ioprio_class);
/* Allow preemption only if we are idling on sync-noidle tree */
if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD &&
cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD &&
- new_cfqq->service_tree->count == 2 &&
RB_EMPTY_ROOT(&cfqq->sort_list))
return true;
@@ -3967,12 +3996,6 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending)
return true;
- /*
- * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
- */
- if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
- return true;
-
/* An idle queue should not be idle now for some reason */
if (RB_EMPTY_ROOT(&cfqq->sort_list) && !cfq_should_idle(cfqd, cfqq))
return true;
@@ -4052,7 +4075,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
* idle timer unplug to continue working.
*/
if (cfq_cfqq_wait_request(cfqq)) {
- if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
+ if (blk_rq_bytes(rq) > PAGE_SIZE ||
cfqd->busy_queues > 1) {
cfq_del_timer(cfqd, cfqq);
cfq_clear_cfqq_wait_request(cfqq);
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f678c733d..556826ac7 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -710,7 +710,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return -EINVAL;
bdi = blk_get_backing_dev_info(bdev);
return compat_put_long(arg,
- (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
+ (bdi->ra_pages * PAGE_SIZE) / 512);
case BLKROGET: /* compatible */
return compat_put_int(arg, bdev_read_only(bdev) != 0);
case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */
@@ -729,7 +729,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
bdi = blk_get_backing_dev_info(bdev);
- bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+ bdi->ra_pages = (arg * 512) / PAGE_SIZE;
return 0;
case BLKGETSIZE:
size = i_size_read(bdev->bd_inode);
diff --git a/block/ioctl.c b/block/ioctl.c
index d8996bbd7..4ff1f92f8 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -550,7 +550,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
if (!arg)
return -EINVAL;
bdi = blk_get_backing_dev_info(bdev);
- return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
+ return put_long(arg, (bdi->ra_pages * PAGE_SIZE) / 512);
case BLKROGET:
return put_int(arg, bdev_read_only(bdev) != 0);
case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
@@ -578,7 +578,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
if(!capable(CAP_SYS_ADMIN))
return -EACCES;
bdi = blk_get_backing_dev_info(bdev);
- bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+ bdi->ra_pages = (arg * 512) / PAGE_SIZE;
return 0;
case BLKBSZSET:
return blkdev_bszset(bdev, mode, argp);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index cfcfe1b0e..d7eb77e1e 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -217,10 +217,21 @@ static void part_release(struct device *dev)
kfree(p);
}
+static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct hd_struct *part = dev_to_part(dev);
+
+ add_uevent_var(env, "PARTN=%u", part->partno);
+ if (part->info && part->info->volname[0])
+ add_uevent_var(env, "PARTNAME=%s", part->info->volname);
+ return 0;
+}
+
struct device_type part_type = {
.name = "partition",
.groups = part_attr_groups,
.release = part_release,
+ .uevent = part_uevent,
};
static void delete_partition_rcu_cb(struct rcu_head *head)
@@ -562,8 +573,8 @@ static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
- NULL);
+ return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)),
+ NULL);
}
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
@@ -580,9 +591,9 @@ unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
if (PageError(page))
goto fail;
p->v = page;
- return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9);
+ return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
fail:
- page_cache_release(page);
+ put_page(page);
}
p->v = NULL;
return NULL;