Skip to content

Commit

Permalink
nvme: ensure disabling pairs with unquiesce
Browse files Browse the repository at this point in the history
If any error handling that disables the controller fails to queue the
reset work, like if the state changed to disconnected inbetween, then
the failed teardown needs to unquiesce the queues since it's no longer
paired with reset_work. Just make sure that the controller can be put
into a resetting state prior to starting the disable so that no other
handling can change the queue states while recovery is happening.

Reported-by: Ming Lei <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
  • Loading branch information
keithbusch committed Jul 12, 2023
1 parent ee6fdc5 commit 71a5bb1
Showing 1 changed file with 17 additions and 8 deletions.
25 changes: 17 additions & 8 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1298,9 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
*/
if (nvme_should_reset(dev, csts)) {
nvme_warn_reset(dev, csts);
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);
return BLK_EH_DONE;
goto disable;
}

/*
Expand Down Expand Up @@ -1351,10 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);

return BLK_EH_DONE;
goto disable;
}

if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
Expand Down Expand Up @@ -1391,6 +1386,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* as the device then is in a faulty state.
*/
return BLK_EH_RESET_TIMER;

disable:
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
return BLK_EH_DONE;

nvme_dev_disable(dev, false);
if (nvme_try_sched_reset(&dev->ctrl))
nvme_unquiesce_io_queues(&dev->ctrl);
return BLK_EH_DONE;
}

static void nvme_free_queue(struct nvme_queue *nvmeq)
Expand Down Expand Up @@ -3278,6 +3282,10 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
nvme_dev_disable(dev, true);
return PCI_ERS_RESULT_DISCONNECT;
}
nvme_dev_disable(dev, false);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
Expand All @@ -3294,7 +3302,8 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)

dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
nvme_reset_ctrl(&dev->ctrl);
if (!nvme_try_sched_reset(&dev->ctrl))
nvme_unquiesce_io_queues(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}

Expand Down

0 comments on commit 71a5bb1

Please sign in to comment.