libata-eh.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. /*
  2. * libata-eh.c - libata error handling
  3. *
  4. * Maintained by: Jeff Garzik <jgarzik@pobox.com>
  5. * Please ALWAYS copy linux-ide@vger.kernel.org
  6. * on emails.
  7. *
  8. * Copyright 2006 Tejun Heo <htejun@gmail.com>
  9. *
  10. *
  11. * This program is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU General Public License as
  13. * published by the Free Software Foundation; either version 2, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. * General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; see the file COPYING. If not, write to
  23. * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
  24. * USA.
  25. *
  26. *
  27. * libata documentation is available via 'make {ps|pdf}docs',
  28. * as Documentation/DocBook/libata.*
  29. *
  30. * Hardware documentation available from http://www.t13.org/ and
  31. * http://www.sata-io.org/
  32. *
  33. */
  34. #include <linux/config.h>
  35. #include <linux/kernel.h>
  36. #include <scsi/scsi.h>
  37. #include <scsi/scsi_host.h>
  38. #include <scsi/scsi_eh.h>
  39. #include <scsi/scsi_device.h>
  40. #include <scsi/scsi_cmnd.h>
  41. #include <linux/libata.h>
  42. #include "libata.h"
  43. /**
  44. * ata_scsi_timed_out - SCSI layer time out callback
  45. * @cmd: timed out SCSI command
  46. *
  47. * Handles SCSI layer timeout. We race with normal completion of
  48. * the qc for @cmd. If the qc is already gone, we lose and let
  49. * the scsi command finish (EH_HANDLED). Otherwise, the qc has
  50. * timed out and EH should be invoked. Prevent ata_qc_complete()
  51. * from finishing it by setting EH_SCHEDULED and return
  52. * EH_NOT_HANDLED.
  53. *
  54. * LOCKING:
  55. * Called from timer context
  56. *
  57. * RETURNS:
  58. * EH_HANDLED or EH_NOT_HANDLED
  59. */
  60. enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
  61. {
  62. struct Scsi_Host *host = cmd->device->host;
  63. struct ata_port *ap = ata_shost_to_port(host);
  64. unsigned long flags;
  65. struct ata_queued_cmd *qc;
  66. enum scsi_eh_timer_return ret = EH_HANDLED;
  67. DPRINTK("ENTER\n");
  68. spin_lock_irqsave(&ap->host_set->lock, flags);
  69. qc = ata_qc_from_tag(ap, ap->active_tag);
  70. if (qc) {
  71. WARN_ON(qc->scsicmd != cmd);
  72. qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
  73. qc->err_mask |= AC_ERR_TIMEOUT;
  74. ret = EH_NOT_HANDLED;
  75. }
  76. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  77. DPRINTK("EXIT, ret=%d\n", ret);
  78. return ret;
  79. }
  80. /**
  81. * ata_scsi_error - SCSI layer error handler callback
  82. * @host: SCSI host on which error occurred
  83. *
  84. * Handles SCSI-layer-thrown error events.
  85. *
  86. * LOCKING:
  87. * Inherited from SCSI layer (none, can sleep)
  88. *
  89. * RETURNS:
  90. * Zero.
  91. */
  92. void ata_scsi_error(struct Scsi_Host *host)
  93. {
  94. struct ata_port *ap = ata_shost_to_port(host);
  95. DPRINTK("ENTER\n");
  96. /* synchronize with IRQ handler and port task */
  97. spin_unlock_wait(&ap->host_set->lock);
  98. ata_port_flush_task(ap);
  99. WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
  100. ap->ops->eng_timeout(ap);
  101. WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
  102. scsi_eh_flush_done_q(&ap->eh_done_q);
  103. DPRINTK("EXIT\n");
  104. }
  105. /**
  106. * ata_qc_timeout - Handle timeout of queued command
  107. * @qc: Command that timed out
  108. *
  109. * Some part of the kernel (currently, only the SCSI layer)
  110. * has noticed that the active command on port @ap has not
  111. * completed after a specified length of time. Handle this
  112. * condition by disabling DMA (if necessary) and completing
  113. * transactions, with error if necessary.
  114. *
  115. * This also handles the case of the "lost interrupt", where
  116. * for some reason (possibly hardware bug, possibly driver bug)
  117. * an interrupt was not delivered to the driver, even though the
  118. * transaction completed successfully.
  119. *
  120. * LOCKING:
  121. * Inherited from SCSI layer (none, can sleep)
  122. */
  123. static void ata_qc_timeout(struct ata_queued_cmd *qc)
  124. {
  125. struct ata_port *ap = qc->ap;
  126. struct ata_host_set *host_set = ap->host_set;
  127. u8 host_stat = 0, drv_stat;
  128. unsigned long flags;
  129. DPRINTK("ENTER\n");
  130. ap->hsm_task_state = HSM_ST_IDLE;
  131. spin_lock_irqsave(&host_set->lock, flags);
  132. switch (qc->tf.protocol) {
  133. case ATA_PROT_DMA:
  134. case ATA_PROT_ATAPI_DMA:
  135. host_stat = ap->ops->bmdma_status(ap);
  136. /* before we do anything else, clear DMA-Start bit */
  137. ap->ops->bmdma_stop(qc);
  138. /* fall through */
  139. default:
  140. ata_altstatus(ap);
  141. drv_stat = ata_chk_status(ap);
  142. /* ack bmdma irq events */
  143. ap->ops->irq_clear(ap);
  144. ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, "
  145. "stat 0x%x host_stat 0x%x\n",
  146. qc->tf.command, drv_stat, host_stat);
  147. /* complete taskfile transaction */
  148. qc->err_mask |= ac_err_mask(drv_stat);
  149. break;
  150. }
  151. spin_unlock_irqrestore(&host_set->lock, flags);
  152. ata_eh_qc_complete(qc);
  153. DPRINTK("EXIT\n");
  154. }
  155. /**
  156. * ata_eng_timeout - Handle timeout of queued command
  157. * @ap: Port on which timed-out command is active
  158. *
  159. * Some part of the kernel (currently, only the SCSI layer)
  160. * has noticed that the active command on port @ap has not
  161. * completed after a specified length of time. Handle this
  162. * condition by disabling DMA (if necessary) and completing
  163. * transactions, with error if necessary.
  164. *
  165. * This also handles the case of the "lost interrupt", where
  166. * for some reason (possibly hardware bug, possibly driver bug)
  167. * an interrupt was not delivered to the driver, even though the
  168. * transaction completed successfully.
  169. *
  170. * LOCKING:
  171. * Inherited from SCSI layer (none, can sleep)
  172. */
  173. void ata_eng_timeout(struct ata_port *ap)
  174. {
  175. DPRINTK("ENTER\n");
  176. ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
  177. DPRINTK("EXIT\n");
  178. }
  179. /**
  180. * ata_qc_schedule_eh - schedule qc for error handling
  181. * @qc: command to schedule error handling for
  182. *
  183. * Schedule error handling for @qc. EH will kick in as soon as
  184. * other commands are drained.
  185. *
  186. * LOCKING:
  187. * spin_lock_irqsave(host_set lock)
  188. */
  189. void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
  190. {
  191. struct ata_port *ap = qc->ap;
  192. WARN_ON(!ap->ops->error_handler);
  193. qc->flags |= ATA_QCFLAG_FAILED;
  194. qc->ap->flags |= ATA_FLAG_EH_PENDING;
  195. /* The following will fail if timeout has already expired.
  196. * ata_scsi_error() takes care of such scmds on EH entry.
  197. * Note that ATA_QCFLAG_FAILED is unconditionally set after
  198. * this function completes.
  199. */
  200. scsi_req_abort_cmd(qc->scsicmd);
  201. }
  202. /**
  203. * ata_port_schedule_eh - schedule error handling without a qc
  204. * @ap: ATA port to schedule EH for
  205. *
  206. * Schedule error handling for @ap. EH will kick in as soon as
  207. * all commands are drained.
  208. *
  209. * LOCKING:
  210. * spin_lock_irqsave(host_set lock)
  211. */
  212. void ata_port_schedule_eh(struct ata_port *ap)
  213. {
  214. WARN_ON(!ap->ops->error_handler);
  215. ap->flags |= ATA_FLAG_EH_PENDING;
  216. ata_schedule_scsi_eh(ap->host);
  217. DPRINTK("port EH scheduled\n");
  218. }
  219. /**
  220. * ata_port_abort - abort all qc's on the port
  221. * @ap: ATA port to abort qc's for
  222. *
  223. * Abort all active qc's of @ap and schedule EH.
  224. *
  225. * LOCKING:
  226. * spin_lock_irqsave(host_set lock)
  227. *
  228. * RETURNS:
  229. * Number of aborted qc's.
  230. */
  231. int ata_port_abort(struct ata_port *ap)
  232. {
  233. int tag, nr_aborted = 0;
  234. WARN_ON(!ap->ops->error_handler);
  235. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  236. struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
  237. if (qc) {
  238. qc->flags |= ATA_QCFLAG_FAILED;
  239. ata_qc_complete(qc);
  240. nr_aborted++;
  241. }
  242. }
  243. if (!nr_aborted)
  244. ata_port_schedule_eh(ap);
  245. return nr_aborted;
  246. }
  247. /**
  248. * __ata_port_freeze - freeze port
  249. * @ap: ATA port to freeze
  250. *
  251. * This function is called when HSM violation or some other
  252. * condition disrupts normal operation of the port. Frozen port
  253. * is not allowed to perform any operation until the port is
  254. * thawed, which usually follows a successful reset.
  255. *
  256. * ap->ops->freeze() callback can be used for freezing the port
  257. * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
  258. * port cannot be frozen hardware-wise, the interrupt handler
  259. * must ack and clear interrupts unconditionally while the port
  260. * is frozen.
  261. *
  262. * LOCKING:
  263. * spin_lock_irqsave(host_set lock)
  264. */
  265. static void __ata_port_freeze(struct ata_port *ap)
  266. {
  267. WARN_ON(!ap->ops->error_handler);
  268. if (ap->ops->freeze)
  269. ap->ops->freeze(ap);
  270. ap->flags |= ATA_FLAG_FROZEN;
  271. DPRINTK("ata%u port frozen\n", ap->id);
  272. }
  273. /**
  274. * ata_port_freeze - abort & freeze port
  275. * @ap: ATA port to freeze
  276. *
  277. * Abort and freeze @ap.
  278. *
  279. * LOCKING:
  280. * spin_lock_irqsave(host_set lock)
  281. *
  282. * RETURNS:
  283. * Number of aborted commands.
  284. */
  285. int ata_port_freeze(struct ata_port *ap)
  286. {
  287. int nr_aborted;
  288. WARN_ON(!ap->ops->error_handler);
  289. nr_aborted = ata_port_abort(ap);
  290. __ata_port_freeze(ap);
  291. return nr_aborted;
  292. }
  293. /**
  294. * ata_eh_freeze_port - EH helper to freeze port
  295. * @ap: ATA port to freeze
  296. *
  297. * Freeze @ap.
  298. *
  299. * LOCKING:
  300. * None.
  301. */
  302. void ata_eh_freeze_port(struct ata_port *ap)
  303. {
  304. unsigned long flags;
  305. if (!ap->ops->error_handler)
  306. return;
  307. spin_lock_irqsave(&ap->host_set->lock, flags);
  308. __ata_port_freeze(ap);
  309. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  310. }
  311. /**
  312. * ata_port_thaw_port - EH helper to thaw port
  313. * @ap: ATA port to thaw
  314. *
  315. * Thaw frozen port @ap.
  316. *
  317. * LOCKING:
  318. * None.
  319. */
  320. void ata_eh_thaw_port(struct ata_port *ap)
  321. {
  322. unsigned long flags;
  323. if (!ap->ops->error_handler)
  324. return;
  325. spin_lock_irqsave(&ap->host_set->lock, flags);
  326. ap->flags &= ~ATA_FLAG_FROZEN;
  327. if (ap->ops->thaw)
  328. ap->ops->thaw(ap);
  329. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  330. DPRINTK("ata%u port thawed\n", ap->id);
  331. }
  332. static void ata_eh_scsidone(struct scsi_cmnd *scmd)
  333. {
  334. /* nada */
  335. }
  336. static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
  337. {
  338. struct ata_port *ap = qc->ap;
  339. struct scsi_cmnd *scmd = qc->scsicmd;
  340. unsigned long flags;
  341. spin_lock_irqsave(&ap->host_set->lock, flags);
  342. qc->scsidone = ata_eh_scsidone;
  343. __ata_qc_complete(qc);
  344. WARN_ON(ata_tag_valid(qc->tag));
  345. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  346. scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
  347. }
  348. /**
  349. * ata_eh_qc_complete - Complete an active ATA command from EH
  350. * @qc: Command to complete
  351. *
  352. * Indicate to the mid and upper layers that an ATA command has
  353. * completed. To be used from EH.
  354. */
  355. void ata_eh_qc_complete(struct ata_queued_cmd *qc)
  356. {
  357. struct scsi_cmnd *scmd = qc->scsicmd;
  358. scmd->retries = scmd->allowed;
  359. __ata_eh_qc_complete(qc);
  360. }
  361. /**
  362. * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
  363. * @qc: Command to retry
  364. *
  365. * Indicate to the mid and upper layers that an ATA command
  366. * should be retried. To be used from EH.
  367. *
  368. * SCSI midlayer limits the number of retries to scmd->allowed.
  369. * scmd->retries is decremented for commands which get retried
  370. * due to unrelated failures (qc->err_mask is zero).
  371. */
  372. void ata_eh_qc_retry(struct ata_queued_cmd *qc)
  373. {
  374. struct scsi_cmnd *scmd = qc->scsicmd;
  375. if (!qc->err_mask && scmd->retries)
  376. scmd->retries--;
  377. __ata_eh_qc_complete(qc);
  378. }