summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig10
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-core.c8
-rw-r--r--block/blk-flush.c25
-rw-r--r--block/blk-softirq.c8
-rw-r--r--block/blk-throttle.c4
-rw-r--r--block/blk.h2
-rw-r--r--block/bsg-lib.c298
-rw-r--r--block/cfq-iosched.c21
-rw-r--r--block/genhd.c8
10 files changed, 371 insertions, 14 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 60be1e0455da..e97934eececa 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
If unsure, say Y.
+config BLK_DEV_BSGLIB
+ bool "Block layer SG support v4 helper lib"
+ default n
+ select BLK_DEV_BSG
+ help
+ Subsystems will normally enable this if needed. Users will not
+ normally need to manually enable this.
+
+ If unsure, say N.
+
config BLK_DEV_INTEGRITY
bool "Block layer data integrity support"
---help---
diff --git a/block/Makefile b/block/Makefile
index 0fec4b3fab51..514c6e4f427a 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
+obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index b627558c461f..90e1ffdeb415 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1702,6 +1702,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
unsigned long flags;
+ int where = ELEVATOR_INSERT_BACK;
if (blk_rq_check_limits(q, rq))
return -EIO;
@@ -1718,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
*/
BUG_ON(blk_queued_rq(rq));
- add_acct_request(q, rq, ELEVATOR_INSERT_BACK);
+ if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
+ where = ELEVATOR_INSERT_FLUSH;
+
+ add_acct_request(q, rq, where);
spin_unlock_irqrestore(q->queue_lock, flags);
return 0;
@@ -2275,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
* %false - we are done with this request
* %true - still buffers pending for this request
**/
-static bool __blk_end_bidi_request(struct request *rq, int error,
+bool __blk_end_bidi_request(struct request *rq, int error,
unsigned int nr_bytes, unsigned int bidi_bytes)
{
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
diff --git a/block/blk-flush.c b/block/blk-flush.c
index bb21e4c36f70..491eb30a242d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
{
unsigned int policy = 0;
+ if (blk_rq_sectors(rq))
+ policy |= REQ_FSEQ_DATA;
+
if (fflags & REQ_FLUSH) {
if (rq->cmd_flags & REQ_FLUSH)
policy |= REQ_FSEQ_PREFLUSH;
- if (blk_rq_sectors(rq))
- policy |= REQ_FSEQ_DATA;
if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
policy |= REQ_FSEQ_POSTFLUSH;
}
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
/* make @rq a normal request */
rq->cmd_flags &= ~REQ_FLUSH_SEQ;
- rq->end_io = NULL;
+ rq->end_io = rq->flush.saved_end_io;
}
/**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
unsigned int fflags = q->flush_flags; /* may change, cache */
unsigned int policy = blk_flush_policy(fflags, rq);
- BUG_ON(rq->end_io);
- BUG_ON(!rq->bio || rq->bio != rq->biotail);
-
/*
* @policy now records what operations need to be done. Adjust
* REQ_FLUSH and FUA for the driver.
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq)
rq->cmd_flags &= ~REQ_FUA;
/*
+ * An empty flush handed down from a stacking driver may
+ * translate into nothing if the underlying device does not
+ * advertise a write-back cache. In this case, simply
+ * complete the request.
+ */
+ if (!policy) {
+ __blk_end_bidi_request(rq, 0, 0, 0);
+ return;
+ }
+
+ BUG_ON(!rq->bio || rq->bio != rq->biotail);
+
+ /*
* If there's data but flush is not necessary, the request can be
* processed directly without going through flush machinery. Queue
* for normal execution.
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
list_add_tail(&rq->queuelist, &q->queue_head);
+ blk_run_queue_async(q);
return;
}
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
memset(&rq->flush, 0, sizeof(rq->flush));
INIT_LIST_HEAD(&rq->flush.list);
rq->cmd_flags |= REQ_FLUSH_SEQ;
+ rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
rq->end_io = flush_data_end_io;
blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 475fab809a80..58340d0cb23a 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
} else
ccpu = cpu;
+ /*
+ * If current CPU and requested CPU are in the same group, running
+ * softirq in current CPU. One might concern this is just like
+ * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
+ * running in interrupt handler, and currently I/O controller doesn't
+ * support multiple interrupts, so current CPU is unique actually. This
+ * avoids IPI sending from current CPU to the first CPU of a group.
+ */
if (ccpu == cpu || ccpu == group_cpu) {
struct list_head *list;
do_local:
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f6a794120505..a19f58c6fc3a 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
- bool sync = bio->bi_rw & REQ_SYNC;
+ bool sync = rw_is_sync(bio->bi_rw);
/* Charge the bio to the group */
tg->bytes_disp[rw] += bio->bi_size;
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
if (tg_no_rule_group(tg, rw)) {
blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
- rw, bio->bi_rw & REQ_SYNC);
+ rw, rw_is_sync(bio->bi_rw));
rcu_read_unlock();
return 0;
}
diff --git a/block/blk.h b/block/blk.h
index d6586287adc9..20b900a377c9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
struct bio *bio);
void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
+bool __blk_end_bidi_request(struct request *rq, int error,
+ unsigned int nr_bytes, unsigned int bidi_bytes);
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
new file mode 100644
index 000000000000..6690e6e41037
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
+/*
+ * BSG helper library
+ *
+ * Copyright (C) 2008 James Smart, Emulex Corporation
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2011 Mike Christie
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/bsg-lib.h>
+#include <linux/module.h>
+#include <scsi/scsi_cmnd.h>
+
+/**
+ * bsg_destroy_job - routine to teardown/delete a bsg job
+ * @job: bsg_job that is to be torn down
+ */
+static void bsg_destroy_job(struct bsg_job *job)
+{
+ put_device(job->dev); /* release reference for the request */
+
+ kfree(job->request_payload.sg_list);
+ kfree(job->reply_payload.sg_list);
+ kfree(job);
+}
+
+/**
+ * bsg_job_done - completion routine for bsg requests
+ * @job: bsg_job that is complete
+ * @result: job reply result
+ * @reply_payload_rcv_len: length of payload recvd
+ *
+ * The LLD should call this when the bsg job has completed.
+ */
+void bsg_job_done(struct bsg_job *job, int result,
+ unsigned int reply_payload_rcv_len)
+{
+ struct request *req = job->req;
+ struct request *rsp = req->next_rq;
+ int err;
+
+ err = job->req->errors = result;
+ if (err < 0)
+ /* we're only returning the result field in the reply */
+ job->req->sense_len = sizeof(u32);
+ else
+ job->req->sense_len = job->reply_len;
+ /* we assume all request payload was transferred, residual == 0 */
+ req->resid_len = 0;
+
+ if (rsp) {
+ WARN_ON(reply_payload_rcv_len > rsp->resid_len);
+
+ /* set reply (bidi) residual */
+ rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
+ }
+ blk_complete_request(req);
+}
+EXPORT_SYMBOL_GPL(bsg_job_done);
+
+/**
+ * bsg_softirq_done - softirq done routine for destroying the bsg requests
+ * @rq: BSG request that holds the job to be destroyed
+ */
+static void bsg_softirq_done(struct request *rq)
+{
+ struct bsg_job *job = rq->special;
+
+ blk_end_request_all(rq, rq->errors);
+ bsg_destroy_job(job);
+}
+
+static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
+{
+ size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
+
+ BUG_ON(!req->nr_phys_segments);
+
+ buf->sg_list = kzalloc(sz, GFP_KERNEL);
+ if (!buf->sg_list)
+ return -ENOMEM;
+ sg_init_table(buf->sg_list, req->nr_phys_segments);
+ buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
+ buf->payload_len = blk_rq_bytes(req);
+ return 0;
+}
+
+/**
+ * bsg_create_job - create the bsg_job structure for the bsg request
+ * @dev: device that is being sent the bsg request
+ * @req: BSG request that needs a job structure
+ */
+static int bsg_create_job(struct device *dev, struct request *req)
+{
+ struct request *rsp = req->next_rq;
+ struct request_queue *q = req->q;
+ struct bsg_job *job;
+ int ret;
+
+ BUG_ON(req->special);
+
+ job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
+ if (!job)
+ return -ENOMEM;
+
+ req->special = job;
+ job->req = req;
+ if (q->bsg_job_size)
+ job->dd_data = (void *)&job[1];
+ job->request = req->cmd;
+ job->request_len = req->cmd_len;
+ job->reply = req->sense;
+ job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
+ * allocated */
+ if (req->bio) {
+ ret = bsg_map_buffer(&job->request_payload, req);
+ if (ret)
+ goto failjob_rls_job;
+ }
+ if (rsp && rsp->bio) {
+ ret = bsg_map_buffer(&job->reply_payload, rsp);
+ if (ret)
+ goto failjob_rls_rqst_payload;
+ }
+ job->dev = dev;
+ /* take a reference for the request */
+ get_device(job->dev);
+ return 0;
+
+failjob_rls_rqst_payload:
+ kfree(job->request_payload.sg_list);
+failjob_rls_job:
+ kfree(job);
+ return -ENOMEM;
+}
+
+/*
+ * bsg_goose_queue - restart queue in case it was stopped
+ * @q: request q to be restarted
+ */
+void bsg_goose_queue(struct request_queue *q)
+{
+ if (!q)
+ return;
+
+ blk_run_queue_async(q);
+}
+EXPORT_SYMBOL_GPL(bsg_goose_queue);
+
+/**
+ * bsg_request_fn - generic handler for bsg requests
+ * @q: request queue to manage
+ *
+ * On error the create_bsg_job function should return a -Exyz error value
+ * that will be set to the req->errors.
+ *
+ * Drivers/subsys should pass this to the queue init function.
+ */
+void bsg_request_fn(struct request_queue *q)
+{
+ struct device *dev = q->queuedata;
+ struct request *req;
+ struct bsg_job *job;
+ int ret;
+
+ if (!get_device(dev))
+ return;
+
+ while (1) {
+ req = blk_fetch_request(q);
+ if (!req)
+ break;
+ spin_unlock_irq(q->queue_lock);
+
+ ret = bsg_create_job(dev, req);
+ if (ret) {
+ req->errors = ret;
+ blk_end_request_all(req, ret);
+ spin_lock_irq(q->queue_lock);
+ continue;
+ }
+
+ job = req->special;
+ ret = q->bsg_job_fn(job);
+ spin_lock_irq(q->queue_lock);
+ if (ret)
+ break;
+ }
+
+ spin_unlock_irq(q->queue_lock);
+ put_device(dev);
+ spin_lock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL_GPL(bsg_request_fn);
+
+/**
+ * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @q: request queue setup by caller
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ *
+ * The caller should have setup the reuqest queue with bsg_request_fn
+ * as the request_fn.
+ */
+int bsg_setup_queue(struct device *dev, struct request_queue *q,
+ char *name, bsg_job_fn *job_fn, int dd_job_size)
+{
+ int ret;
+
+ q->queuedata = dev;
+ q->bsg_job_size = dd_job_size;
+ q->bsg_job_fn = job_fn;
+ queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
+ blk_queue_softirq_done(q, bsg_softirq_done);
+ blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
+
+ ret = bsg_register_queue(q, dev, name, NULL);
+ if (ret) {
+ printk(KERN_ERR "%s: bsg interface failed to "
+ "initialize - register queue\n", dev->kobj.name);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+
+/**
+ * bsg_remove_queue - Deletes the bsg dev from the q
+ * @q: the request_queue that is to be torn down.
+ *
+ * Notes:
+ * Before unregistering the queue empty any requests that are blocked
+ */
+void bsg_remove_queue(struct request_queue *q)
+{
+ struct request *req; /* block request */
+ int counts; /* totals for request_list count and starved */
+
+ if (!q)
+ return;
+
+ /* Stop taking in new requests */
+ spin_lock_irq(q->queue_lock);
+ blk_stop_queue(q);
+
+ /* drain all requests in the queue */
+ while (1) {
+ /* need the lock to fetch a request
+ * this may fetch the same reqeust as the previous pass
+ */
+ req = blk_fetch_request(q);
+ /* save requests in use and starved */
+ counts = q->rq.count[0] + q->rq.count[1] +
+ q->rq.starved[0] + q->rq.starved[1];
+ spin_unlock_irq(q->queue_lock);
+ /* any requests still outstanding? */
+ if (counts == 0)
+ break;
+
+ /* This may be the same req as the previous iteration,
+ * always send the blk_end_request_all after a prefetch.
+ * It is not okay to not end the request because the
+ * prefetch started the request.
+ */
+ if (req) {
+ /* return -ENXIO to indicate that this queue is
+ * going away
+ */
+ req->errors = -ENXIO;
+ blk_end_request_all(req, -ENXIO);
+ }
+
+ msleep(200); /* allow bsg to possibly finish */
+ spin_lock_irq(q->queue_lock);
+ }
+ bsg_unregister_queue(q);
+}
+EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1f96ad6254f1..a33bd4377c61 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -130,6 +130,8 @@ struct cfq_queue {
unsigned long slice_end;
long slice_resid;
+ /* pending metadata requests */
+ int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
if (rq_is_sync(rq1) != rq_is_sync(rq2))
return rq_is_sync(rq1) ? rq1 : rq2;
+ if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
+ return rq1->cmd_flags & REQ_META ? rq1 : rq2;
+
s1 = blk_rq_pos(rq1);
s2 = blk_rq_pos(rq2);
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
hlist_del_init(&cfqg->cfqd_node);
+ BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
+ cfqd->nr_blkcg_linked_grps--;
+
/*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
cfqq->cfqd->rq_queued--;
cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
rq_data_dir(rq), rq_is_sync(rq));
+ if (rq->cmd_flags & REQ_META) {
+ WARN_ON(!cfqq->meta_pending);
+ cfqq->meta_pending--;
+ }
}
static int cfq_merge(struct request_queue *q, struct request **req,
@@ -3357,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
return true;
/*
+ * So both queues are sync. Let the new request get disk time if
+ * it's a metadata request and the current queue is doing regular IO.
+ */
+ if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
+ return true;
+
+ /*
* Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
*/
if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_io_context *cic = RQ_CIC(rq);
cfqd->rq_queued++;
+ if (rq->cmd_flags & REQ_META)
+ cfqq->meta_pending++;
cfq_update_io_thinktime(cfqd, cfqq, cic);
cfq_update_io_seektime(cfqd, cfqq, rq);
diff --git a/block/genhd.c b/block/genhd.c
index 5cb51c55f6d8..e2f67902dd02 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
cpu = part_stat_lock();
part_round_stats(cpu, hd);
part_stat_unlock();
- seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
- "%u %lu %lu %llu %u %u %u %u\n",
+ seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
+ "%u %lu %lu %lu %u %u %u %u\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
part_stat_read(hd, ios[READ]),
part_stat_read(hd, merges[READ]),
- (unsigned long long)part_stat_read(hd, sectors[READ]),
+ part_stat_read(hd, sectors[READ]),
jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
part_stat_read(hd, ios[WRITE]),
part_stat_read(hd, merges[WRITE]),
- (unsigned long long)part_stat_read(hd, sectors[WRITE]),
+ part_stat_read(hd, sectors[WRITE]),
jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
part_in_flight(hd),
jiffies_to_msecs(part_stat_read(hd, io_ticks)),