summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/multipath.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/multipath.c')
-rw-r--r--drivers/nvme/host/multipath.c98
1 files changed, 65 insertions, 33 deletions
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 56caddeabb5e..3968f89f7855 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -3,6 +3,7 @@
* Copyright (c) 2017-2018 Christoph Hellwig.
*/
+#include <linux/backing-dev.h>
#include <linux/moduleparam.h>
#include <trace/events/block.h>
#include "nvme.h"
@@ -64,17 +65,12 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
}
}
-void nvme_failover_req(struct request *req)
+bool nvme_failover_req(struct request *req)
{
struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status;
unsigned long flags;
- spin_lock_irqsave(&ns->head->requeue_lock, flags);
- blk_steal_bios(&ns->head->requeue_list, req);
- spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
- blk_mq_end_request(req, 0);
-
switch (status & 0x7ff) {
case NVME_SC_ANA_TRANSITION:
case NVME_SC_ANA_INACCESSIBLE:
@@ -103,15 +99,17 @@ void nvme_failover_req(struct request *req)
nvme_mpath_clear_current_path(ns);
break;
default:
- /*
- * Reset the controller for any non-ANA error as we don't know
- * what caused the error.
- */
- nvme_reset_ctrl(ns->ctrl);
- break;
+ /* This was a non-ANA error so follow the normal error path. */
+ return false;
}
+ spin_lock_irqsave(&ns->head->requeue_lock, flags);
+ blk_steal_bios(&ns->head->requeue_list, req);
+ spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
+ blk_mq_end_request(req, 0);
+
kblockd_schedule_work(&ns->head->requeue_work);
+ return true;
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
@@ -248,6 +246,17 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
fallback = ns;
}
+ /*
+ * The loop above skips the current path for round-robin semantics.
+ * Fall back to the current path if either:
+ * - no other optimized path found and current is optimized,
+ * - no other usable path found and current is usable.
+ */
+ if (!nvme_path_is_disabled(old) &&
+ (old->ana_state == NVME_ANA_OPTIMIZED ||
+ (!fallback && old->ana_state == NVME_ANA_NONOPTIMIZED)))
+ return old;
+
if (!fallback)
return NULL;
found = fallback;
@@ -268,10 +277,13 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
struct nvme_ns *ns;
ns = srcu_dereference(head->current_path[node], &head->srcu);
- if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR && ns)
- ns = nvme_round_robin_path(head, node, ns);
- if (unlikely(!ns || !nvme_path_is_optimized(ns)))
- ns = __nvme_find_path(head, node);
+ if (unlikely(!ns))
+ return __nvme_find_path(head, node);
+
+ if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR)
+ return nvme_round_robin_path(head, node, ns);
+ if (unlikely(!nvme_path_is_optimized(ns)))
+ return __nvme_find_path(head, node);
return ns;
}
@@ -413,15 +425,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
- lockdep_assert_held(&ns->head->lock);
-
if (!head->disk)
return;
- if (!(head->disk->flags & GENHD_FL_UP))
+ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
device_add_disk(&head->subsys->dev, head->disk,
nvme_ns_id_attr_groups);
+ mutex_lock(&head->lock);
if (nvme_path_is_optimized(ns)) {
int node, srcu_idx;
@@ -430,9 +441,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
__nvme_find_path(head, node);
srcu_read_unlock(&head->srcu, srcu_idx);
}
+ mutex_unlock(&head->lock);
- synchronize_srcu(&ns->head->srcu);
- kblockd_schedule_work(&ns->head->requeue_work);
+ synchronize_srcu(&head->srcu);
+ kblockd_schedule_work(&head->requeue_work);
}
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
@@ -483,14 +495,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
struct nvme_ns *ns)
{
- mutex_lock(&ns->head->lock);
ns->ana_grpid = le32_to_cpu(desc->grpid);
ns->ana_state = desc->state;
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
if (nvme_state_is_live(ns->ana_state))
nvme_mpath_set_live(ns);
- mutex_unlock(&ns->head->lock);
}
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
@@ -640,31 +650,45 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
}
DEVICE_ATTR_RO(ana_state);
-static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
+static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
- struct nvme_ns *ns = data;
+ struct nvme_ana_group_desc *dst = data;
- if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
- nvme_update_ns_ana_state(desc, ns);
- return -ENXIO; /* just break out of the loop */
- }
+ if (desc->grpid != dst->grpid)
+ return 0;
- return 0;
+ *dst = *desc;
+ return -ENXIO; /* just break out of the loop */
}
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
{
if (nvme_ctrl_use_ana(ns->ctrl)) {
+ struct nvme_ana_group_desc desc = {
+ .grpid = id->anagrpid,
+ .state = 0,
+ };
+
mutex_lock(&ns->ctrl->ana_lock);
ns->ana_grpid = le32_to_cpu(id->anagrpid);
- nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
+ nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
mutex_unlock(&ns->ctrl->ana_lock);
+ if (desc.state) {
+ /* found the group desc: update */
+ nvme_update_ns_ana_state(&desc, ns);
+ }
} else {
- mutex_lock(&ns->head->lock);
ns->ana_state = NVME_ANA_OPTIMIZED;
nvme_mpath_set_live(ns);
- mutex_unlock(&ns->head->lock);
+ }
+
+ if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
+ struct gendisk *disk = ns->head->disk;
+
+ if (disk)
+ disk->queue->backing_dev_info->capabilities |=
+ BDI_CAP_STABLE_WRITES;
}
}
@@ -679,6 +703,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
blk_cleanup_queue(head->disk->queue);
+ if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
+ /*
+ * if device_add_disk wasn't called, prevent
+ * disk release to put a bogus reference on the
+ * request queue
+ */
+ head->disk->queue = NULL;
+ }
put_disk(head->disk);
}