diff options
Diffstat (limited to 'drivers/nvme/host/multipath.c')
-rw-r--r-- | drivers/nvme/host/multipath.c | 98 |
1 files changed, 65 insertions, 33 deletions
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 56caddeabb5e..3968f89f7855 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -3,6 +3,7 @@ * Copyright (c) 2017-2018 Christoph Hellwig. */ +#include <linux/backing-dev.h> #include <linux/moduleparam.h> #include <trace/events/block.h> #include "nvme.h" @@ -64,17 +65,12 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, } } -void nvme_failover_req(struct request *req) +bool nvme_failover_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; u16 status = nvme_req(req)->status; unsigned long flags; - spin_lock_irqsave(&ns->head->requeue_lock, flags); - blk_steal_bios(&ns->head->requeue_list, req); - spin_unlock_irqrestore(&ns->head->requeue_lock, flags); - blk_mq_end_request(req, 0); - switch (status & 0x7ff) { case NVME_SC_ANA_TRANSITION: case NVME_SC_ANA_INACCESSIBLE: @@ -103,15 +99,17 @@ void nvme_failover_req(struct request *req) nvme_mpath_clear_current_path(ns); break; default: - /* - * Reset the controller for any non-ANA error as we don't know - * what caused the error. - */ - nvme_reset_ctrl(ns->ctrl); - break; + /* This was a non-ANA error so follow the normal error path. */ + return false; } + spin_lock_irqsave(&ns->head->requeue_lock, flags); + blk_steal_bios(&ns->head->requeue_list, req); + spin_unlock_irqrestore(&ns->head->requeue_lock, flags); + blk_mq_end_request(req, 0); + kblockd_schedule_work(&ns->head->requeue_work); + return true; } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) @@ -248,6 +246,17 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, fallback = ns; } + /* + * The loop above skips the current path for round-robin semantics. + * Fall back to the current path if either: + * - no other optimized path found and current is optimized, + * - no other usable path found and current is usable. + */ + if (!nvme_path_is_disabled(old) && + (old->ana_state == NVME_ANA_OPTIMIZED || + (!fallback && old->ana_state == NVME_ANA_NONOPTIMIZED))) + return old; + if (!fallback) return NULL; found = fallback; @@ -268,10 +277,13 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) struct nvme_ns *ns; ns = srcu_dereference(head->current_path[node], &head->srcu); - if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR && ns) - ns = nvme_round_robin_path(head, node, ns); - if (unlikely(!ns || !nvme_path_is_optimized(ns))) - ns = __nvme_find_path(head, node); + if (unlikely(!ns)) + return __nvme_find_path(head, node); + + if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_RR) + return nvme_round_robin_path(head, node, ns); + if (unlikely(!nvme_path_is_optimized(ns))) + return __nvme_find_path(head, node); return ns; } @@ -413,15 +425,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; - lockdep_assert_held(&ns->head->lock); - if (!head->disk) return; - if (!(head->disk->flags & GENHD_FL_UP)) + if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); + mutex_lock(&head->lock); if (nvme_path_is_optimized(ns)) { int node, srcu_idx; @@ -430,9 +441,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) __nvme_find_path(head, node); srcu_read_unlock(&head->srcu, srcu_idx); } + mutex_unlock(&head->lock); - synchronize_srcu(&ns->head->srcu); - kblockd_schedule_work(&ns->head->requeue_work); + synchronize_srcu(&head->srcu); + kblockd_schedule_work(&head->requeue_work); } static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, @@ -483,14 +495,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state) static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, struct nvme_ns *ns) { - mutex_lock(&ns->head->lock); ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); if (nvme_state_is_live(ns->ana_state)) nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); } static int nvme_update_ana_state(struct nvme_ctrl *ctrl, @@ -640,31 +650,45 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, } DEVICE_ATTR_RO(ana_state); -static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl, +static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *desc, void *data) { - struct nvme_ns *ns = data; + struct nvme_ana_group_desc *dst = data; - if (ns->ana_grpid == le32_to_cpu(desc->grpid)) { - nvme_update_ns_ana_state(desc, ns); - return -ENXIO; /* just break out of the loop */ - } + if (desc->grpid != dst->grpid) + return 0; - return 0; + *dst = *desc; + return -ENXIO; /* just break out of the loop */ } void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) { if (nvme_ctrl_use_ana(ns->ctrl)) { + struct nvme_ana_group_desc desc = { + .grpid = id->anagrpid, + .state = 0, + }; + mutex_lock(&ns->ctrl->ana_lock); ns->ana_grpid = le32_to_cpu(id->anagrpid); - nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state); + nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc); mutex_unlock(&ns->ctrl->ana_lock); + if (desc.state) { + /* found the group desc: update */ + nvme_update_ns_ana_state(&desc, ns); + } } else { - mutex_lock(&ns->head->lock); ns->ana_state = NVME_ANA_OPTIMIZED; nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); + } + + if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { + struct gendisk *disk = ns->head->disk; + + if (disk) + disk->queue->backing_dev_info->capabilities |= + BDI_CAP_STABLE_WRITES; } } @@ -679,6 +703,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); blk_cleanup_queue(head->disk->queue); + if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { + /* + * if device_add_disk wasn't called, prevent + * disk release to put a bogus reference on the + * request queue + */ + head->disk->queue = NULL; + } put_disk(head->disk); } |