summaryrefslogtreecommitdiff
path: root/drivers/scsi
diff options
context:
space:
mode:
authorSubhash Jadavani <subhashj@codeaurora.org>2016-07-07 19:21:48 -0700
committerGerrit - the friendly Code Review server <code-review@localhost>2016-08-21 22:36:22 -0700
commiteffcb63b04b618eb0cb8648a325646867225051c (patch)
tree9c2fcf00dce766631cde302fc8183bcded86aa58 /drivers/scsi
parentc605e110ab18604981481a7b502da54640b620bc (diff)
scsi: ufs: fix race between hibern8 failure recovery and error handler
If Hibern8 enter/exit fails and we also see some UIC errors at the same time, we would see following 2 recovery paths running in parallel to restore the host and device communication. Context-1: ufshcd_uic_hibern8_exit() -> ufshcd_link_recovery() -> ufshcd_reset_and_restore() Context-2: ufshcd_err_handler()->ufshcd_reset_and_restore() This change fixes this race by making the ufshcd_link_recovery() to wait for the already scheduled ufshcd_err_handler() to finish running and then schedule the error handler again to make sure that host-device link is reestablished. While we are fixing the above race, similar race could happen between ufshcd_eh_host_reset_handler() and ufshcd_err_handler() hence fix the error handling in ufshcd_eh_host_reset_handler() same way as fixed in ufshcd_link_recovery(). CRs-fixed: 1037647 Change-Id: Ic7a17a907e70122968c324e3cbe6e0421c28a2c9 Signed-off-by: Asutosh Das <asutoshd@codeaurora.org> Signed-off-by: Subhash Jadavani <subhashj@codeaurora.org>
Diffstat (limited to 'drivers/scsi')
-rw-r--r--drivers/scsi/ufs/ufs-debugfs.c10
-rw-r--r--drivers/scsi/ufs/ufshcd.c135
-rw-r--r--drivers/scsi/ufs/ufshcd.h1
3 files changed, 91 insertions, 55 deletions
diff --git a/drivers/scsi/ufs/ufs-debugfs.c b/drivers/scsi/ufs/ufs-debugfs.c
index f3b4b6c08571..d6e372fc7922 100644
--- a/drivers/scsi/ufs/ufs-debugfs.c
+++ b/drivers/scsi/ufs/ufs-debugfs.c
@@ -1412,8 +1412,10 @@ static ssize_t ufsdbg_reset_controller_write(struct file *filp,
struct ufs_hba *hba = filp->f_mapping->host->i_private;
unsigned long flags;
- spin_lock_irqsave(hba->host->host_lock, flags);
+ pm_runtime_get_sync(hba->dev);
+ ufshcd_hold(hba, false);
+ spin_lock_irqsave(hba->host->host_lock, flags);
/*
* simulating a dummy error in order to "convince"
* eh_work to actually reset the controller
@@ -1421,9 +1423,13 @@ static ssize_t ufsdbg_reset_controller_write(struct file *filp,
hba->saved_err |= INT_FATAL_ERRORS;
hba->silence_err_logs = true;
schedule_work(&hba->eh_work);
-
spin_unlock_irqrestore(hba->host->host_lock, flags);
+ flush_work(&hba->eh_work);
+
+ ufshcd_release(hba, false);
+ pm_runtime_put_sync(hba->dev);
+
return cnt;
}
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 1fdfadf5e1b9..ce779d760c69 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2630,6 +2630,13 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
return SCSI_MLQUEUE_HOST_BUSY;
spin_lock_irqsave(hba->host->host_lock, flags);
+
+ /* if error handling is in progress, return host busy */
+ if (ufshcd_eh_in_progress(hba)) {
+ err = SCSI_MLQUEUE_HOST_BUSY;
+ goto out_unlock;
+ }
+
switch (hba->ufshcd_state) {
case UFSHCD_STATE_OPERATIONAL:
break;
@@ -2647,13 +2654,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
cmd->scsi_done(cmd);
goto out_unlock;
}
-
- /* if error handling is in progress, don't issue commands */
- if (ufshcd_eh_in_progress(hba)) {
- set_host_byte(cmd, DID_ERROR);
- cmd->scsi_done(cmd);
- goto out_unlock;
- }
spin_unlock_irqrestore(hba->host->host_lock, flags);
hba->req_abort_count = 0;
@@ -4039,31 +4039,49 @@ out:
static int ufshcd_link_recovery(struct ufs_hba *hba)
{
- int ret;
+ int ret = 0;
unsigned long flags;
- spin_lock_irqsave(hba->host->host_lock, flags);
- hba->ufshcd_state = UFSHCD_STATE_RESET;
- ufshcd_set_eh_in_progress(hba);
- spin_unlock_irqrestore(hba->host->host_lock, flags);
+ /*
+ * Check if there is any race with fatal error handling.
+ * If so, wait for it to complete. Even though fatal error
+ * handling does reset and restore in some cases, don't assume
+ * anything out of it. We are just avoiding race here.
+ */
+ do {
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ if (!(work_pending(&hba->eh_work) ||
+ hba->ufshcd_state == UFSHCD_STATE_RESET))
+ break;
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+ dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
+ flush_work(&hba->eh_work);
+ } while (1);
- ret = ufshcd_vops_full_reset(hba);
- if (ret)
- dev_warn(hba->dev,
- "full reset returned %d, trying to recover the link\n",
- ret);
- ret = ufshcd_host_reset_and_restore(hba);
+ /*
+ * we don't know if previous reset had really reset the host controller
+ * or not. So let's force reset here to be sure.
+ */
+ hba->ufshcd_state = UFSHCD_STATE_ERROR;
+ hba->force_host_reset = true;
+ schedule_work(&hba->eh_work);
- spin_lock_irqsave(hba->host->host_lock, flags);
- if (ret)
- hba->ufshcd_state = UFSHCD_STATE_ERROR;
- ufshcd_clear_eh_in_progress(hba);
- spin_unlock_irqrestore(hba->host->host_lock, flags);
+ /* wait for the reset work to finish */
+ do {
+ if (!(work_pending(&hba->eh_work) ||
+ hba->ufshcd_state == UFSHCD_STATE_RESET))
+ break;
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+ dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
+ flush_work(&hba->eh_work);
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ } while (1);
- if (ret)
- dev_err(hba->dev, "%s: link recovery failed, err %d",
- __func__, ret);
+ if (!((hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) &&
+ ufshcd_is_link_active(hba)))
+ ret = -ENOLINK;
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
return ret;
}
@@ -4087,8 +4105,7 @@ static int __ufshcd_uic_hibern8_enter(struct ufs_hba *hba)
* If link recovery fails then return error so that caller
* don't retry the hibern8 enter again.
*/
- if (ufshcd_link_recovery(hba))
- ret = -ENOLINK;
+ ret = ufshcd_link_recovery(hba);
} else {
dev_dbg(hba->dev, "%s: Hibern8 Enter at %lld us", __func__,
ktime_to_us(ktime_get()));
@@ -5604,11 +5621,9 @@ static void ufshcd_err_handler(struct work_struct *work)
hba = container_of(work, struct ufs_hba, eh_work);
+ spin_lock_irqsave(hba->host->host_lock, flags);
ufsdbg_set_err_state(hba);
- pm_runtime_get_sync(hba->dev);
- ufshcd_hold_all(hba);
- spin_lock_irqsave(hba->host->host_lock, flags);
if (hba->ufshcd_state == UFSHCD_STATE_RESET)
goto out;
@@ -5644,7 +5659,8 @@ static void ufshcd_err_handler(struct work_struct *work)
}
}
- if ((hba->saved_err & INT_FATAL_ERRORS) || hba->saved_ce_err ||
+ if ((hba->saved_err & INT_FATAL_ERRORS)
+ || hba->saved_ce_err || hba->force_host_reset ||
((hba->saved_err & UIC_ERROR) &&
(hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR |
UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
@@ -5732,6 +5748,7 @@ skip_pending_xfer_clear:
hba->saved_err = 0;
hba->saved_uic_err = 0;
hba->saved_ce_err = 0;
+ hba->force_host_reset = false;
}
skip_err_handling:
@@ -5743,12 +5760,9 @@ skip_err_handling:
}
hba->silence_err_logs = false;
- ufshcd_clear_eh_in_progress(hba);
out:
+ ufshcd_clear_eh_in_progress(hba);
spin_unlock_irqrestore(hba->host->host_lock, flags);
- ufshcd_scsi_unblock_requests(hba);
- ufshcd_release_all(hba);
- pm_runtime_put_sync(hba->dev);
}
static void ufshcd_update_uic_reg_hist(struct ufs_uic_err_reg_hist *reg_hist,
@@ -5849,8 +5863,11 @@ static void ufshcd_check_errors(struct ufs_hba *hba)
/* handle fatal errors only when link is functional */
if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
- /* block commands from scsi mid-layer */
- __ufshcd_scsi_block_requests(hba);
+ /*
+ * Set error handling in progress flag early so that we
+ * don't issue new requests any more.
+ */
+ ufshcd_set_eh_in_progress(hba);
hba->ufshcd_state = UFSHCD_STATE_ERROR;
schedule_work(&hba->eh_work);
@@ -6354,6 +6371,11 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba)
int retries = MAX_HOST_RESET_RETRIES;
do {
+ err = ufshcd_vops_full_reset(hba);
+ if (err)
+ dev_warn(hba->dev, "%s: full reset returned %d\n",
+ __func__, err);
+
err = ufshcd_host_reset_and_restore(hba);
} while (err && --retries);
@@ -6383,13 +6405,12 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba)
*/
static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
{
- int err;
+ int err = SUCCESS;
unsigned long flags;
struct ufs_hba *hba;
hba = shost_priv(cmd->device->host);
- ufshcd_hold_all(hba);
/*
* Check if there is any race with fatal error handling.
* If so, wait for it to complete. Even though fatal error
@@ -6402,29 +6423,37 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
hba->ufshcd_state == UFSHCD_STATE_RESET))
break;
spin_unlock_irqrestore(hba->host->host_lock, flags);
- dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
+ dev_err(hba->dev, "%s: reset in progress - 1\n", __func__);
flush_work(&hba->eh_work);
} while (1);
- hba->ufshcd_state = UFSHCD_STATE_RESET;
- ufshcd_set_eh_in_progress(hba);
- spin_unlock_irqrestore(hba->host->host_lock, flags);
+ /*
+ * we don't know if previous reset had really reset the host controller
+ * or not. So let's force reset here to be sure.
+ */
+ hba->ufshcd_state = UFSHCD_STATE_ERROR;
+ hba->force_host_reset = true;
+ schedule_work(&hba->eh_work);
- ufshcd_update_error_stats(hba, UFS_ERR_EH);
- err = ufshcd_reset_and_restore(hba);
+ /* wait for the reset work to finish */
+ do {
+ if (!(work_pending(&hba->eh_work) ||
+ hba->ufshcd_state == UFSHCD_STATE_RESET))
+ break;
+ spin_unlock_irqrestore(hba->host->host_lock, flags);
+ dev_err(hba->dev, "%s: reset in progress - 2\n", __func__);
+ flush_work(&hba->eh_work);
+ spin_lock_irqsave(hba->host->host_lock, flags);
+ } while (1);
- spin_lock_irqsave(hba->host->host_lock, flags);
- if (!err) {
- err = SUCCESS;
- hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
- } else {
+ if (!((hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) &&
+ ufshcd_is_link_active(hba))) {
err = FAILED;
hba->ufshcd_state = UFSHCD_STATE_ERROR;
}
- ufshcd_clear_eh_in_progress(hba);
+
spin_unlock_irqrestore(hba->host->host_lock, flags);
- ufshcd_release_all(hba);
return err;
}
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index a4ee3726edb0..552d50081e3f 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -815,6 +815,7 @@ struct ufs_hba {
u32 saved_uic_err;
u32 saved_ce_err;
bool silence_err_logs;
+ bool force_host_reset;
/* Device management request data */
struct ufs_dev_cmd dev_cmd;