summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bfq-iosched.c11
-rw-r--r--block/bio-integrity.c8
-rw-r--r--block/bio.c131
-rw-r--r--block/blk-merge.c48
-rw-r--r--block/blk-mq-sysfs.c15
-rw-r--r--block/blk-mq.c2
-rw-r--r--block/blk-sysfs.c3
7 files changed, 160 insertions, 58 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 7d45ac451745..93863c6173e6 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3314,7 +3314,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* whether bfqq is being weight-raised, because
* bfq_symmetric_scenario() does not take into account also
* weight-raised queues (see comments on
- * bfq_weights_tree_add()).
+ * bfq_weights_tree_add()). In particular, if bfqq is being
+ * weight-raised, it is important to idle only if there are
+ * other, non-weight-raised queues that may steal throughput
+ * to bfqq. Actually, we should be even more precise, and
+ * differentiate between interactive weight raising and
+ * soft real-time weight raising.
*
* As a side note, it is worth considering that the above
* device-idling countermeasures may however fail in the
@@ -3326,7 +3331,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq)
* to let requests be served in the desired order until all
* the requests already queued in the device have been served.
*/
- asymmetric_scenario = bfqq->wr_coeff > 1 ||
+ asymmetric_scenario = (bfqq->wr_coeff > 1 &&
+ bfqd->wr_busy_queues < bfqd->busy_queues) ||
!bfq_symmetric_scenario(bfqd);
/*
@@ -3760,6 +3766,7 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
unsigned long flags;
spin_lock_irqsave(&bfqd->lock, flags);
+ bfqq->bic = NULL;
bfq_exit_bfqq(bfqd, bfqq);
bic_set_bfqq(bic, NULL, is_sync);
spin_unlock_irqrestore(&bfqd->lock, flags);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 5df32907ff3b..7f8010662437 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -313,8 +313,12 @@ bool bio_integrity_prep(struct bio *bio)
ret = bio_integrity_add_page(bio, virt_to_page(buf),
bytes, offset);
- if (ret == 0)
- return false;
+ if (ret == 0) {
+ printk(KERN_ERR "could not attach integrity payload\n");
+ kfree(buf);
+ status = BLK_STS_RESOURCE;
+ goto err_end_io;
+ }
if (ret < bytes)
break;
diff --git a/block/bio.c b/block/bio.c
index d01ab919b313..1384f9790882 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -773,7 +773,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
return 0;
}
- if (bio->bi_vcnt >= bio->bi_max_vecs)
+ if (bio_full(bio))
return 0;
/*
@@ -821,65 +821,97 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
EXPORT_SYMBOL(bio_add_pc_page);
/**
- * bio_add_page - attempt to add page to bio
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
+ * __bio_try_merge_page - try appending data to an existing bvec.
+ * @bio: destination bio
+ * @page: page to add
+ * @len: length of the data to add
+ * @off: offset of the data in @page
*
- * Attempt to add a page to the bio_vec maplist. This will only fail
- * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
+ * Try to add the data at @page + @off to the last bvec of @bio. This is a
+ * a useful optimisation for file systems with a block size smaller than the
+ * page size.
+ *
+ * Return %true on success or %false on failure.
*/
-int bio_add_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset)
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
{
- struct bio_vec *bv;
-
- /*
- * cloned bio must not modify vec list
- */
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
- return 0;
+ return false;
- /*
- * For filesystems with a blocksize smaller than the pagesize
- * we will often be called with the same page as last time and
- * a consecutive offset. Optimize this special case.
- */
if (bio->bi_vcnt > 0) {
- bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if (page == bv->bv_page &&
- offset == bv->bv_offset + bv->bv_len) {
+ if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
bv->bv_len += len;
- goto done;
+ bio->bi_iter.bi_size += len;
+ return true;
}
}
+ return false;
+}
+EXPORT_SYMBOL_GPL(__bio_try_merge_page);
- if (bio->bi_vcnt >= bio->bi_max_vecs)
- return 0;
+/**
+ * __bio_add_page - add page to a bio in a new segment
+ * @bio: destination bio
+ * @page: page to add
+ * @len: length of the data to add
+ * @off: offset of the data in @page
+ *
+ * Add the data at @page + @off to @bio as a new bvec. The caller must ensure
+ * that @bio has space for another bvec.
+ */
+void __bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int off)
+{
+ struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
- bv = &bio->bi_io_vec[bio->bi_vcnt];
- bv->bv_page = page;
- bv->bv_len = len;
- bv->bv_offset = offset;
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ WARN_ON_ONCE(bio_full(bio));
+
+ bv->bv_page = page;
+ bv->bv_offset = off;
+ bv->bv_len = len;
- bio->bi_vcnt++;
-done:
bio->bi_iter.bi_size += len;
+ bio->bi_vcnt++;
+}
+EXPORT_SYMBOL_GPL(__bio_add_page);
+
+/**
+ * bio_add_page - attempt to add page to bio
+ * @bio: destination bio
+ * @page: page to add
+ * @len: vec entry length
+ * @offset: vec entry offset
+ *
+ * Attempt to add a page to the bio_vec maplist. This will only fail
+ * if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
+ */
+int bio_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ if (!__bio_try_merge_page(bio, page, len, offset)) {
+ if (bio_full(bio))
+ return 0;
+ __bio_add_page(bio, page, len, offset);
+ }
return len;
}
EXPORT_SYMBOL(bio_add_page);
/**
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
* @bio: bio to add pages to
* @iter: iov iterator describing the region to be mapped
*
- * Pins as many pages from *iter and appends them to @bio's bvec array. The
+ * Pins pages from *iter and appends them to @bio's bvec array. The
* pages will have to be released using put_page() when done.
+ * For multi-segment *iter, this function only adds pages from the
+ * the next non-empty segment of the iov iterator.
*/
-int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
@@ -916,6 +948,33 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
iov_iter_advance(iter, size);
return 0;
}
+
+/**
+ * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * @bio: bio to add pages to
+ * @iter: iov iterator describing the region to be mapped
+ *
+ * Pins pages from *iter and appends them to @bio's bvec array. The
+ * pages will have to be released using put_page() when done.
+ * The function tries, but does not guarantee, to pin as many pages as
+ * fit into the bio, or are requested in *iter, whatever is smaller.
+ * If MM encounters an error pinning the requested pages, it stops.
+ * Error is returned only if 0 pages could be pinned.
+ */
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
+{
+ unsigned short orig_vcnt = bio->bi_vcnt;
+
+ do {
+ int ret = __bio_iov_iter_get_pages(bio, iter);
+
+ if (unlikely(ret))
+ return bio->bi_vcnt > orig_vcnt ? 0 : ret;
+
+ } while (iov_iter_count(iter) && !bio_full(bio));
+
+ return 0;
+}
EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
struct submit_bio_ret {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8d60a5bbcef9..f61b50a01bc7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -659,6 +659,31 @@ static void blk_account_io_merge(struct request *req)
part_stat_unlock();
}
}
+/*
+ * Two cases of handling DISCARD merge:
+ * If max_discard_segments > 1, the driver takes every bio
+ * as a range and send them to controller together. The ranges
+ * needn't to be contiguous.
+ * Otherwise, the bios/requests will be handled as same as
+ * others which should be contiguous.
+ */
+static inline bool blk_discard_mergable(struct request *req)
+{
+ if (req_op(req) == REQ_OP_DISCARD &&
+ queue_max_discard_segments(req->q) > 1)
+ return true;
+ return false;
+}
+
+enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
+{
+ if (blk_discard_mergable(req))
+ return ELEVATOR_DISCARD_MERGE;
+ else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
+ return ELEVATOR_BACK_MERGE;
+
+ return ELEVATOR_NO_MERGE;
+}
/*
* For non-mq, this has to be called with the request spinlock acquired.
@@ -676,12 +701,6 @@ static struct request *attempt_merge(struct request_queue *q,
if (req_op(req) != req_op(next))
return NULL;
- /*
- * not contiguous
- */
- if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
- return NULL;
-
if (rq_data_dir(req) != rq_data_dir(next)
|| req->rq_disk != next->rq_disk
|| req_no_special_merge(next))
@@ -705,11 +724,19 @@ static struct request *attempt_merge(struct request_queue *q,
* counts here. Handle DISCARDs separately, as they
* have separate settings.
*/
- if (req_op(req) == REQ_OP_DISCARD) {
+
+ switch (blk_try_req_merge(req, next)) {
+ case ELEVATOR_DISCARD_MERGE:
if (!req_attempt_discard_merge(q, req, next))
return NULL;
- } else if (!ll_merge_requests_fn(q, req, next))
+ break;
+ case ELEVATOR_BACK_MERGE:
+ if (!ll_merge_requests_fn(q, req, next))
+ return NULL;
+ break;
+ default:
return NULL;
+ }
/*
* If failfast settings disagree or any of the two is already
@@ -738,7 +765,7 @@ static struct request *attempt_merge(struct request_queue *q,
req->__data_len += blk_rq_bytes(next);
- if (req_op(req) != REQ_OP_DISCARD)
+ if (!blk_discard_mergable(req))
elv_merge_requests(q, req, next);
/*
@@ -834,8 +861,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
{
- if (req_op(rq) == REQ_OP_DISCARD &&
- queue_max_discard_segments(rq->q) > 1)
+ if (blk_discard_mergable(rq))
return ELEVATOR_DISCARD_MERGE;
else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
return ELEVATOR_BACK_MERGE;
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index a54b4b070f1c..de209d60a662 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -145,20 +145,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
+ const size_t size = PAGE_SIZE - 1;
unsigned int i, first = 1;
- ssize_t ret = 0;
+ int ret = 0, pos = 0;
for_each_cpu(i, hctx->cpumask) {
if (first)
- ret += sprintf(ret + page, "%u", i);
+ ret = snprintf(pos + page, size - pos, "%u", i);
else
- ret += sprintf(ret + page, ", %u", i);
+ ret = snprintf(pos + page, size - pos, ", %u", i);
+
+ if (ret >= size - pos)
+ break;
first = 0;
+ pos += ret;
}
- ret += sprintf(ret + page, "\n");
- return ret;
+ ret = snprintf(pos + page, size + 1 - pos, "\n");
+ return pos + ret;
}
static struct attribute *default_ctx_attrs[] = {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f7427d6b6588..3875729f31c5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2295,8 +2295,6 @@ void blk_mq_release(struct request_queue *q)
struct blk_mq_hw_ctx *hctx;
unsigned int i;
- cancel_delayed_work_sync(&q->requeue_work);
-
/* hctx kobj stays in hctx */
queue_for_each_hw_ctx(q, hctx, i) {
if (!hctx)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d0ec0d55d3d0..dfc391df0ffe 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -811,6 +811,9 @@ static void __blk_release_queue(struct work_struct *work)
blk_free_queue_stats(q->stats);
+ if (q->mq_ops)
+ cancel_delayed_work_sync(&q->requeue_work);
+
blk_exit_rl(q, &q->root_rl);
if (q->queue_tags)