libata-eh.c 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558
  1. /*
  2. * libata-eh.c - libata error handling
  3. *
  4. * Maintained by: Jeff Garzik <jgarzik@pobox.com>
  5. * Please ALWAYS copy linux-ide@vger.kernel.org
  6. * on emails.
  7. *
  8. * Copyright 2006 Tejun Heo <htejun@gmail.com>
  9. *
  10. *
  11. * This program is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU General Public License as
  13. * published by the Free Software Foundation; either version 2, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. * General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; see the file COPYING. If not, write to
  23. * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
  24. * USA.
  25. *
  26. *
  27. * libata documentation is available via 'make {ps|pdf}docs',
  28. * as Documentation/DocBook/libata.*
  29. *
  30. * Hardware documentation available from http://www.t13.org/ and
  31. * http://www.sata-io.org/
  32. *
  33. */
  34. #include <linux/config.h>
  35. #include <linux/kernel.h>
  36. #include <scsi/scsi.h>
  37. #include <scsi/scsi_host.h>
  38. #include <scsi/scsi_eh.h>
  39. #include <scsi/scsi_device.h>
  40. #include <scsi/scsi_cmnd.h>
  41. #include <linux/libata.h>
  42. #include "libata.h"
  43. static void __ata_port_freeze(struct ata_port *ap);
  44. static void ata_ering_record(struct ata_ering *ering, int is_io,
  45. unsigned int err_mask)
  46. {
  47. struct ata_ering_entry *ent;
  48. WARN_ON(!err_mask);
  49. ering->cursor++;
  50. ering->cursor %= ATA_ERING_SIZE;
  51. ent = &ering->ring[ering->cursor];
  52. ent->is_io = is_io;
  53. ent->err_mask = err_mask;
  54. ent->timestamp = get_jiffies_64();
  55. }
  56. static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering)
  57. {
  58. struct ata_ering_entry *ent = &ering->ring[ering->cursor];
  59. if (!ent->err_mask)
  60. return NULL;
  61. return ent;
  62. }
  63. static int ata_ering_map(struct ata_ering *ering,
  64. int (*map_fn)(struct ata_ering_entry *, void *),
  65. void *arg)
  66. {
  67. int idx, rc = 0;
  68. struct ata_ering_entry *ent;
  69. idx = ering->cursor;
  70. do {
  71. ent = &ering->ring[idx];
  72. if (!ent->err_mask)
  73. break;
  74. rc = map_fn(ent, arg);
  75. if (rc)
  76. break;
  77. idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
  78. } while (idx != ering->cursor);
  79. return rc;
  80. }
  81. /**
  82. * ata_scsi_timed_out - SCSI layer time out callback
  83. * @cmd: timed out SCSI command
  84. *
  85. * Handles SCSI layer timeout. We race with normal completion of
  86. * the qc for @cmd. If the qc is already gone, we lose and let
  87. * the scsi command finish (EH_HANDLED). Otherwise, the qc has
  88. * timed out and EH should be invoked. Prevent ata_qc_complete()
  89. * from finishing it by setting EH_SCHEDULED and return
  90. * EH_NOT_HANDLED.
  91. *
  92. * TODO: kill this function once old EH is gone.
  93. *
  94. * LOCKING:
  95. * Called from timer context
  96. *
  97. * RETURNS:
  98. * EH_HANDLED or EH_NOT_HANDLED
  99. */
  100. enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
  101. {
  102. struct Scsi_Host *host = cmd->device->host;
  103. struct ata_port *ap = ata_shost_to_port(host);
  104. unsigned long flags;
  105. struct ata_queued_cmd *qc;
  106. enum scsi_eh_timer_return ret;
  107. DPRINTK("ENTER\n");
  108. if (ap->ops->error_handler) {
  109. ret = EH_NOT_HANDLED;
  110. goto out;
  111. }
  112. ret = EH_HANDLED;
  113. spin_lock_irqsave(&ap->host_set->lock, flags);
  114. qc = ata_qc_from_tag(ap, ap->active_tag);
  115. if (qc) {
  116. WARN_ON(qc->scsicmd != cmd);
  117. qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
  118. qc->err_mask |= AC_ERR_TIMEOUT;
  119. ret = EH_NOT_HANDLED;
  120. }
  121. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  122. out:
  123. DPRINTK("EXIT, ret=%d\n", ret);
  124. return ret;
  125. }
  126. /**
  127. * ata_scsi_error - SCSI layer error handler callback
  128. * @host: SCSI host on which error occurred
  129. *
  130. * Handles SCSI-layer-thrown error events.
  131. *
  132. * LOCKING:
  133. * Inherited from SCSI layer (none, can sleep)
  134. *
  135. * RETURNS:
  136. * Zero.
  137. */
  138. void ata_scsi_error(struct Scsi_Host *host)
  139. {
  140. struct ata_port *ap = ata_shost_to_port(host);
  141. spinlock_t *hs_lock = &ap->host_set->lock;
  142. int i, repeat_cnt = ATA_EH_MAX_REPEAT;
  143. unsigned long flags;
  144. DPRINTK("ENTER\n");
  145. /* synchronize with port task */
  146. ata_port_flush_task(ap);
  147. /* synchronize with host_set lock and sort out timeouts */
  148. /* For new EH, all qcs are finished in one of three ways -
  149. * normal completion, error completion, and SCSI timeout.
  150. * Both cmpletions can race against SCSI timeout. When normal
  151. * completion wins, the qc never reaches EH. When error
  152. * completion wins, the qc has ATA_QCFLAG_FAILED set.
  153. *
  154. * When SCSI timeout wins, things are a bit more complex.
  155. * Normal or error completion can occur after the timeout but
  156. * before this point. In such cases, both types of
  157. * completions are honored. A scmd is determined to have
  158. * timed out iff its associated qc is active and not failed.
  159. */
  160. if (ap->ops->error_handler) {
  161. struct scsi_cmnd *scmd, *tmp;
  162. int nr_timedout = 0;
  163. spin_lock_irqsave(hs_lock, flags);
  164. list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
  165. struct ata_queued_cmd *qc;
  166. for (i = 0; i < ATA_MAX_QUEUE; i++) {
  167. qc = __ata_qc_from_tag(ap, i);
  168. if (qc->flags & ATA_QCFLAG_ACTIVE &&
  169. qc->scsicmd == scmd)
  170. break;
  171. }
  172. if (i < ATA_MAX_QUEUE) {
  173. /* the scmd has an associated qc */
  174. if (!(qc->flags & ATA_QCFLAG_FAILED)) {
  175. /* which hasn't failed yet, timeout */
  176. qc->err_mask |= AC_ERR_TIMEOUT;
  177. qc->flags |= ATA_QCFLAG_FAILED;
  178. nr_timedout++;
  179. }
  180. } else {
  181. /* Normal completion occurred after
  182. * SCSI timeout but before this point.
  183. * Successfully complete it.
  184. */
  185. scmd->retries = scmd->allowed;
  186. scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
  187. }
  188. }
  189. /* If we have timed out qcs. They belong to EH from
  190. * this point but the state of the controller is
  191. * unknown. Freeze the port to make sure the IRQ
  192. * handler doesn't diddle with those qcs. This must
  193. * be done atomically w.r.t. setting QCFLAG_FAILED.
  194. */
  195. if (nr_timedout)
  196. __ata_port_freeze(ap);
  197. spin_unlock_irqrestore(hs_lock, flags);
  198. } else
  199. spin_unlock_wait(hs_lock);
  200. repeat:
  201. /* invoke error handler */
  202. if (ap->ops->error_handler) {
  203. /* fetch & clear EH info */
  204. spin_lock_irqsave(hs_lock, flags);
  205. memset(&ap->eh_context, 0, sizeof(ap->eh_context));
  206. ap->eh_context.i = ap->eh_info;
  207. memset(&ap->eh_info, 0, sizeof(ap->eh_info));
  208. ap->flags &= ~ATA_FLAG_EH_PENDING;
  209. spin_unlock_irqrestore(hs_lock, flags);
  210. /* invoke EH */
  211. ap->ops->error_handler(ap);
  212. /* Exception might have happend after ->error_handler
  213. * recovered the port but before this point. Repeat
  214. * EH in such case.
  215. */
  216. spin_lock_irqsave(hs_lock, flags);
  217. if (ap->flags & ATA_FLAG_EH_PENDING) {
  218. if (--repeat_cnt) {
  219. ata_port_printk(ap, KERN_INFO,
  220. "EH pending after completion, "
  221. "repeating EH (cnt=%d)\n", repeat_cnt);
  222. spin_unlock_irqrestore(hs_lock, flags);
  223. goto repeat;
  224. }
  225. ata_port_printk(ap, KERN_ERR, "EH pending after %d "
  226. "tries, giving up\n", ATA_EH_MAX_REPEAT);
  227. }
  228. /* this run is complete, make sure EH info is clear */
  229. memset(&ap->eh_info, 0, sizeof(ap->eh_info));
  230. /* Clear host_eh_scheduled while holding hs_lock such
  231. * that if exception occurs after this point but
  232. * before EH completion, SCSI midlayer will
  233. * re-initiate EH.
  234. */
  235. host->host_eh_scheduled = 0;
  236. spin_unlock_irqrestore(hs_lock, flags);
  237. } else {
  238. WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
  239. ap->ops->eng_timeout(ap);
  240. }
  241. /* finish or retry handled scmd's and clean up */
  242. WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
  243. scsi_eh_flush_done_q(&ap->eh_done_q);
  244. /* clean up */
  245. spin_lock_irqsave(hs_lock, flags);
  246. if (ap->flags & ATA_FLAG_RECOVERED)
  247. ata_port_printk(ap, KERN_INFO, "EH complete\n");
  248. ap->flags &= ~ATA_FLAG_RECOVERED;
  249. spin_unlock_irqrestore(hs_lock, flags);
  250. DPRINTK("EXIT\n");
  251. }
  252. /**
  253. * ata_qc_timeout - Handle timeout of queued command
  254. * @qc: Command that timed out
  255. *
  256. * Some part of the kernel (currently, only the SCSI layer)
  257. * has noticed that the active command on port @ap has not
  258. * completed after a specified length of time. Handle this
  259. * condition by disabling DMA (if necessary) and completing
  260. * transactions, with error if necessary.
  261. *
  262. * This also handles the case of the "lost interrupt", where
  263. * for some reason (possibly hardware bug, possibly driver bug)
  264. * an interrupt was not delivered to the driver, even though the
  265. * transaction completed successfully.
  266. *
  267. * TODO: kill this function once old EH is gone.
  268. *
  269. * LOCKING:
  270. * Inherited from SCSI layer (none, can sleep)
  271. */
  272. static void ata_qc_timeout(struct ata_queued_cmd *qc)
  273. {
  274. struct ata_port *ap = qc->ap;
  275. struct ata_host_set *host_set = ap->host_set;
  276. u8 host_stat = 0, drv_stat;
  277. unsigned long flags;
  278. DPRINTK("ENTER\n");
  279. ap->hsm_task_state = HSM_ST_IDLE;
  280. spin_lock_irqsave(&host_set->lock, flags);
  281. switch (qc->tf.protocol) {
  282. case ATA_PROT_DMA:
  283. case ATA_PROT_ATAPI_DMA:
  284. host_stat = ap->ops->bmdma_status(ap);
  285. /* before we do anything else, clear DMA-Start bit */
  286. ap->ops->bmdma_stop(qc);
  287. /* fall through */
  288. default:
  289. ata_altstatus(ap);
  290. drv_stat = ata_chk_status(ap);
  291. /* ack bmdma irq events */
  292. ap->ops->irq_clear(ap);
  293. ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, "
  294. "stat 0x%x host_stat 0x%x\n",
  295. qc->tf.command, drv_stat, host_stat);
  296. /* complete taskfile transaction */
  297. qc->err_mask |= AC_ERR_TIMEOUT;
  298. break;
  299. }
  300. spin_unlock_irqrestore(&host_set->lock, flags);
  301. ata_eh_qc_complete(qc);
  302. DPRINTK("EXIT\n");
  303. }
  304. /**
  305. * ata_eng_timeout - Handle timeout of queued command
  306. * @ap: Port on which timed-out command is active
  307. *
  308. * Some part of the kernel (currently, only the SCSI layer)
  309. * has noticed that the active command on port @ap has not
  310. * completed after a specified length of time. Handle this
  311. * condition by disabling DMA (if necessary) and completing
  312. * transactions, with error if necessary.
  313. *
  314. * This also handles the case of the "lost interrupt", where
  315. * for some reason (possibly hardware bug, possibly driver bug)
  316. * an interrupt was not delivered to the driver, even though the
  317. * transaction completed successfully.
  318. *
  319. * TODO: kill this function once old EH is gone.
  320. *
  321. * LOCKING:
  322. * Inherited from SCSI layer (none, can sleep)
  323. */
  324. void ata_eng_timeout(struct ata_port *ap)
  325. {
  326. DPRINTK("ENTER\n");
  327. ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
  328. DPRINTK("EXIT\n");
  329. }
  330. /**
  331. * ata_qc_schedule_eh - schedule qc for error handling
  332. * @qc: command to schedule error handling for
  333. *
  334. * Schedule error handling for @qc. EH will kick in as soon as
  335. * other commands are drained.
  336. *
  337. * LOCKING:
  338. * spin_lock_irqsave(host_set lock)
  339. */
  340. void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
  341. {
  342. struct ata_port *ap = qc->ap;
  343. WARN_ON(!ap->ops->error_handler);
  344. qc->flags |= ATA_QCFLAG_FAILED;
  345. qc->ap->flags |= ATA_FLAG_EH_PENDING;
  346. /* The following will fail if timeout has already expired.
  347. * ata_scsi_error() takes care of such scmds on EH entry.
  348. * Note that ATA_QCFLAG_FAILED is unconditionally set after
  349. * this function completes.
  350. */
  351. scsi_req_abort_cmd(qc->scsicmd);
  352. }
  353. /**
  354. * ata_port_schedule_eh - schedule error handling without a qc
  355. * @ap: ATA port to schedule EH for
  356. *
  357. * Schedule error handling for @ap. EH will kick in as soon as
  358. * all commands are drained.
  359. *
  360. * LOCKING:
  361. * spin_lock_irqsave(host_set lock)
  362. */
  363. void ata_port_schedule_eh(struct ata_port *ap)
  364. {
  365. WARN_ON(!ap->ops->error_handler);
  366. ap->flags |= ATA_FLAG_EH_PENDING;
  367. ata_schedule_scsi_eh(ap->host);
  368. DPRINTK("port EH scheduled\n");
  369. }
  370. /**
  371. * ata_port_abort - abort all qc's on the port
  372. * @ap: ATA port to abort qc's for
  373. *
  374. * Abort all active qc's of @ap and schedule EH.
  375. *
  376. * LOCKING:
  377. * spin_lock_irqsave(host_set lock)
  378. *
  379. * RETURNS:
  380. * Number of aborted qc's.
  381. */
  382. int ata_port_abort(struct ata_port *ap)
  383. {
  384. int tag, nr_aborted = 0;
  385. WARN_ON(!ap->ops->error_handler);
  386. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  387. struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
  388. if (qc) {
  389. qc->flags |= ATA_QCFLAG_FAILED;
  390. ata_qc_complete(qc);
  391. nr_aborted++;
  392. }
  393. }
  394. if (!nr_aborted)
  395. ata_port_schedule_eh(ap);
  396. return nr_aborted;
  397. }
  398. /**
  399. * __ata_port_freeze - freeze port
  400. * @ap: ATA port to freeze
  401. *
  402. * This function is called when HSM violation or some other
  403. * condition disrupts normal operation of the port. Frozen port
  404. * is not allowed to perform any operation until the port is
  405. * thawed, which usually follows a successful reset.
  406. *
  407. * ap->ops->freeze() callback can be used for freezing the port
  408. * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
  409. * port cannot be frozen hardware-wise, the interrupt handler
  410. * must ack and clear interrupts unconditionally while the port
  411. * is frozen.
  412. *
  413. * LOCKING:
  414. * spin_lock_irqsave(host_set lock)
  415. */
  416. static void __ata_port_freeze(struct ata_port *ap)
  417. {
  418. WARN_ON(!ap->ops->error_handler);
  419. if (ap->ops->freeze)
  420. ap->ops->freeze(ap);
  421. ap->flags |= ATA_FLAG_FROZEN;
  422. DPRINTK("ata%u port frozen\n", ap->id);
  423. }
  424. /**
  425. * ata_port_freeze - abort & freeze port
  426. * @ap: ATA port to freeze
  427. *
  428. * Abort and freeze @ap.
  429. *
  430. * LOCKING:
  431. * spin_lock_irqsave(host_set lock)
  432. *
  433. * RETURNS:
  434. * Number of aborted commands.
  435. */
  436. int ata_port_freeze(struct ata_port *ap)
  437. {
  438. int nr_aborted;
  439. WARN_ON(!ap->ops->error_handler);
  440. nr_aborted = ata_port_abort(ap);
  441. __ata_port_freeze(ap);
  442. return nr_aborted;
  443. }
  444. /**
  445. * ata_eh_freeze_port - EH helper to freeze port
  446. * @ap: ATA port to freeze
  447. *
  448. * Freeze @ap.
  449. *
  450. * LOCKING:
  451. * None.
  452. */
  453. void ata_eh_freeze_port(struct ata_port *ap)
  454. {
  455. unsigned long flags;
  456. if (!ap->ops->error_handler)
  457. return;
  458. spin_lock_irqsave(&ap->host_set->lock, flags);
  459. __ata_port_freeze(ap);
  460. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  461. }
  462. /**
  463. * ata_port_thaw_port - EH helper to thaw port
  464. * @ap: ATA port to thaw
  465. *
  466. * Thaw frozen port @ap.
  467. *
  468. * LOCKING:
  469. * None.
  470. */
  471. void ata_eh_thaw_port(struct ata_port *ap)
  472. {
  473. unsigned long flags;
  474. if (!ap->ops->error_handler)
  475. return;
  476. spin_lock_irqsave(&ap->host_set->lock, flags);
  477. ap->flags &= ~ATA_FLAG_FROZEN;
  478. if (ap->ops->thaw)
  479. ap->ops->thaw(ap);
  480. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  481. DPRINTK("ata%u port thawed\n", ap->id);
  482. }
  483. static void ata_eh_scsidone(struct scsi_cmnd *scmd)
  484. {
  485. /* nada */
  486. }
  487. static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
  488. {
  489. struct ata_port *ap = qc->ap;
  490. struct scsi_cmnd *scmd = qc->scsicmd;
  491. unsigned long flags;
  492. spin_lock_irqsave(&ap->host_set->lock, flags);
  493. qc->scsidone = ata_eh_scsidone;
  494. __ata_qc_complete(qc);
  495. WARN_ON(ata_tag_valid(qc->tag));
  496. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  497. scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
  498. }
  499. /**
  500. * ata_eh_qc_complete - Complete an active ATA command from EH
  501. * @qc: Command to complete
  502. *
  503. * Indicate to the mid and upper layers that an ATA command has
  504. * completed. To be used from EH.
  505. */
  506. void ata_eh_qc_complete(struct ata_queued_cmd *qc)
  507. {
  508. struct scsi_cmnd *scmd = qc->scsicmd;
  509. scmd->retries = scmd->allowed;
  510. __ata_eh_qc_complete(qc);
  511. }
  512. /**
  513. * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
  514. * @qc: Command to retry
  515. *
  516. * Indicate to the mid and upper layers that an ATA command
  517. * should be retried. To be used from EH.
  518. *
  519. * SCSI midlayer limits the number of retries to scmd->allowed.
  520. * scmd->retries is decremented for commands which get retried
  521. * due to unrelated failures (qc->err_mask is zero).
  522. */
  523. void ata_eh_qc_retry(struct ata_queued_cmd *qc)
  524. {
  525. struct scsi_cmnd *scmd = qc->scsicmd;
  526. if (!qc->err_mask && scmd->retries)
  527. scmd->retries--;
  528. __ata_eh_qc_complete(qc);
  529. }
  530. /**
  531. * ata_eh_about_to_do - about to perform eh_action
  532. * @ap: target ATA port
  533. * @action: action about to be performed
  534. *
  535. * Called just before performing EH actions to clear related bits
  536. * in @ap->eh_info such that eh actions are not unnecessarily
  537. * repeated.
  538. *
  539. * LOCKING:
  540. * None.
  541. */
  542. static void ata_eh_about_to_do(struct ata_port *ap, unsigned int action)
  543. {
  544. unsigned long flags;
  545. spin_lock_irqsave(&ap->host_set->lock, flags);
  546. ap->eh_info.action &= ~action;
  547. ap->flags |= ATA_FLAG_RECOVERED;
  548. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  549. }
  550. /**
  551. * ata_err_string - convert err_mask to descriptive string
  552. * @err_mask: error mask to convert to string
  553. *
  554. * Convert @err_mask to descriptive string. Errors are
  555. * prioritized according to severity and only the most severe
  556. * error is reported.
  557. *
  558. * LOCKING:
  559. * None.
  560. *
  561. * RETURNS:
  562. * Descriptive string for @err_mask
  563. */
  564. static const char * ata_err_string(unsigned int err_mask)
  565. {
  566. if (err_mask & AC_ERR_HOST_BUS)
  567. return "host bus error";
  568. if (err_mask & AC_ERR_ATA_BUS)
  569. return "ATA bus error";
  570. if (err_mask & AC_ERR_TIMEOUT)
  571. return "timeout";
  572. if (err_mask & AC_ERR_HSM)
  573. return "HSM violation";
  574. if (err_mask & AC_ERR_SYSTEM)
  575. return "internal error";
  576. if (err_mask & AC_ERR_MEDIA)
  577. return "media error";
  578. if (err_mask & AC_ERR_INVALID)
  579. return "invalid argument";
  580. if (err_mask & AC_ERR_DEV)
  581. return "device error";
  582. return "unknown error";
  583. }
  584. /**
  585. * ata_read_log_page - read a specific log page
  586. * @dev: target device
  587. * @page: page to read
  588. * @buf: buffer to store read page
  589. * @sectors: number of sectors to read
  590. *
  591. * Read log page using READ_LOG_EXT command.
  592. *
  593. * LOCKING:
  594. * Kernel thread context (may sleep).
  595. *
  596. * RETURNS:
  597. * 0 on success, AC_ERR_* mask otherwise.
  598. */
  599. static unsigned int ata_read_log_page(struct ata_device *dev,
  600. u8 page, void *buf, unsigned int sectors)
  601. {
  602. struct ata_taskfile tf;
  603. unsigned int err_mask;
  604. DPRINTK("read log page - page %d\n", page);
  605. ata_tf_init(dev, &tf);
  606. tf.command = ATA_CMD_READ_LOG_EXT;
  607. tf.lbal = page;
  608. tf.nsect = sectors;
  609. tf.hob_nsect = sectors >> 8;
  610. tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;
  611. tf.protocol = ATA_PROT_PIO;
  612. err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
  613. buf, sectors * ATA_SECT_SIZE);
  614. DPRINTK("EXIT, err_mask=%x\n", err_mask);
  615. return err_mask;
  616. }
  617. /**
  618. * ata_eh_read_log_10h - Read log page 10h for NCQ error details
  619. * @dev: Device to read log page 10h from
  620. * @tag: Resulting tag of the failed command
  621. * @tf: Resulting taskfile registers of the failed command
  622. *
  623. * Read log page 10h to obtain NCQ error details and clear error
  624. * condition.
  625. *
  626. * LOCKING:
  627. * Kernel thread context (may sleep).
  628. *
  629. * RETURNS:
  630. * 0 on success, -errno otherwise.
  631. */
  632. static int ata_eh_read_log_10h(struct ata_device *dev,
  633. int *tag, struct ata_taskfile *tf)
  634. {
  635. u8 *buf = dev->ap->sector_buf;
  636. unsigned int err_mask;
  637. u8 csum;
  638. int i;
  639. err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1);
  640. if (err_mask)
  641. return -EIO;
  642. csum = 0;
  643. for (i = 0; i < ATA_SECT_SIZE; i++)
  644. csum += buf[i];
  645. if (csum)
  646. ata_dev_printk(dev, KERN_WARNING,
  647. "invalid checksum 0x%x on log page 10h\n", csum);
  648. if (buf[0] & 0x80)
  649. return -ENOENT;
  650. *tag = buf[0] & 0x1f;
  651. tf->command = buf[2];
  652. tf->feature = buf[3];
  653. tf->lbal = buf[4];
  654. tf->lbam = buf[5];
  655. tf->lbah = buf[6];
  656. tf->device = buf[7];
  657. tf->hob_lbal = buf[8];
  658. tf->hob_lbam = buf[9];
  659. tf->hob_lbah = buf[10];
  660. tf->nsect = buf[12];
  661. tf->hob_nsect = buf[13];
  662. return 0;
  663. }
  664. /**
  665. * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
  666. * @dev: device to perform REQUEST_SENSE to
  667. * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
  668. *
  669. * Perform ATAPI REQUEST_SENSE after the device reported CHECK
  670. * SENSE. This function is EH helper.
  671. *
  672. * LOCKING:
  673. * Kernel thread context (may sleep).
  674. *
  675. * RETURNS:
  676. * 0 on success, AC_ERR_* mask on failure
  677. */
  678. static unsigned int atapi_eh_request_sense(struct ata_device *dev,
  679. unsigned char *sense_buf)
  680. {
  681. struct ata_port *ap = dev->ap;
  682. struct ata_taskfile tf;
  683. u8 cdb[ATAPI_CDB_LEN];
  684. DPRINTK("ATAPI request sense\n");
  685. ata_tf_init(dev, &tf);
  686. /* FIXME: is this needed? */
  687. memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
  688. /* XXX: why tf_read here? */
  689. ap->ops->tf_read(ap, &tf);
  690. /* fill these in, for the case where they are -not- overwritten */
  691. sense_buf[0] = 0x70;
  692. sense_buf[2] = tf.feature >> 4;
  693. memset(cdb, 0, ATAPI_CDB_LEN);
  694. cdb[0] = REQUEST_SENSE;
  695. cdb[4] = SCSI_SENSE_BUFFERSIZE;
  696. tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
  697. tf.command = ATA_CMD_PACKET;
  698. /* is it pointless to prefer PIO for "safety reasons"? */
  699. if (ap->flags & ATA_FLAG_PIO_DMA) {
  700. tf.protocol = ATA_PROT_ATAPI_DMA;
  701. tf.feature |= ATAPI_PKT_DMA;
  702. } else {
  703. tf.protocol = ATA_PROT_ATAPI;
  704. tf.lbam = (8 * 1024) & 0xff;
  705. tf.lbah = (8 * 1024) >> 8;
  706. }
  707. return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
  708. sense_buf, SCSI_SENSE_BUFFERSIZE);
  709. }
  710. /**
  711. * ata_eh_analyze_serror - analyze SError for a failed port
  712. * @ap: ATA port to analyze SError for
  713. *
  714. * Analyze SError if available and further determine cause of
  715. * failure.
  716. *
  717. * LOCKING:
  718. * None.
  719. */
  720. static void ata_eh_analyze_serror(struct ata_port *ap)
  721. {
  722. struct ata_eh_context *ehc = &ap->eh_context;
  723. u32 serror = ehc->i.serror;
  724. unsigned int err_mask = 0, action = 0;
  725. if (serror & SERR_PERSISTENT) {
  726. err_mask |= AC_ERR_ATA_BUS;
  727. action |= ATA_EH_HARDRESET;
  728. }
  729. if (serror &
  730. (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
  731. err_mask |= AC_ERR_ATA_BUS;
  732. action |= ATA_EH_SOFTRESET;
  733. }
  734. if (serror & SERR_PROTOCOL) {
  735. err_mask |= AC_ERR_HSM;
  736. action |= ATA_EH_SOFTRESET;
  737. }
  738. if (serror & SERR_INTERNAL) {
  739. err_mask |= AC_ERR_SYSTEM;
  740. action |= ATA_EH_SOFTRESET;
  741. }
  742. if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) {
  743. err_mask |= AC_ERR_ATA_BUS;
  744. action |= ATA_EH_HARDRESET;
  745. }
  746. ehc->i.err_mask |= err_mask;
  747. ehc->i.action |= action;
  748. }
  749. /**
  750. * ata_eh_analyze_ncq_error - analyze NCQ error
  751. * @ap: ATA port to analyze NCQ error for
  752. *
  753. * Read log page 10h, determine the offending qc and acquire
  754. * error status TF. For NCQ device errors, all LLDDs have to do
  755. * is setting AC_ERR_DEV in ehi->err_mask. This function takes
  756. * care of the rest.
  757. *
  758. * LOCKING:
  759. * Kernel thread context (may sleep).
  760. */
  761. static void ata_eh_analyze_ncq_error(struct ata_port *ap)
  762. {
  763. struct ata_eh_context *ehc = &ap->eh_context;
  764. struct ata_device *dev = ap->device;
  765. struct ata_queued_cmd *qc;
  766. struct ata_taskfile tf;
  767. int tag, rc;
  768. /* if frozen, we can't do much */
  769. if (ap->flags & ATA_FLAG_FROZEN)
  770. return;
  771. /* is it NCQ device error? */
  772. if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
  773. return;
  774. /* has LLDD analyzed already? */
  775. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  776. qc = __ata_qc_from_tag(ap, tag);
  777. if (!(qc->flags & ATA_QCFLAG_FAILED))
  778. continue;
  779. if (qc->err_mask)
  780. return;
  781. }
  782. /* okay, this error is ours */
  783. rc = ata_eh_read_log_10h(dev, &tag, &tf);
  784. if (rc) {
  785. ata_port_printk(ap, KERN_ERR, "failed to read log page 10h "
  786. "(errno=%d)\n", rc);
  787. return;
  788. }
  789. if (!(ap->sactive & (1 << tag))) {
  790. ata_port_printk(ap, KERN_ERR, "log page 10h reported "
  791. "inactive tag %d\n", tag);
  792. return;
  793. }
  794. /* we've got the perpetrator, condemn it */
  795. qc = __ata_qc_from_tag(ap, tag);
  796. memcpy(&qc->result_tf, &tf, sizeof(tf));
  797. qc->err_mask |= AC_ERR_DEV;
  798. ehc->i.err_mask &= ~AC_ERR_DEV;
  799. }
  800. /**
  801. * ata_eh_analyze_tf - analyze taskfile of a failed qc
  802. * @qc: qc to analyze
  803. * @tf: Taskfile registers to analyze
  804. *
  805. * Analyze taskfile of @qc and further determine cause of
  806. * failure. This function also requests ATAPI sense data if
  807. * avaliable.
  808. *
  809. * LOCKING:
  810. * Kernel thread context (may sleep).
  811. *
  812. * RETURNS:
  813. * Determined recovery action
  814. */
  815. static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
  816. const struct ata_taskfile *tf)
  817. {
  818. unsigned int tmp, action = 0;
  819. u8 stat = tf->command, err = tf->feature;
  820. if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
  821. qc->err_mask |= AC_ERR_HSM;
  822. return ATA_EH_SOFTRESET;
  823. }
  824. if (!(qc->err_mask & AC_ERR_DEV))
  825. return 0;
  826. switch (qc->dev->class) {
  827. case ATA_DEV_ATA:
  828. if (err & ATA_ICRC)
  829. qc->err_mask |= AC_ERR_ATA_BUS;
  830. if (err & ATA_UNC)
  831. qc->err_mask |= AC_ERR_MEDIA;
  832. if (err & ATA_IDNF)
  833. qc->err_mask |= AC_ERR_INVALID;
  834. break;
  835. case ATA_DEV_ATAPI:
  836. tmp = atapi_eh_request_sense(qc->dev,
  837. qc->scsicmd->sense_buffer);
  838. if (!tmp) {
  839. /* ATA_QCFLAG_SENSE_VALID is used to tell
  840. * atapi_qc_complete() that sense data is
  841. * already valid.
  842. *
  843. * TODO: interpret sense data and set
  844. * appropriate err_mask.
  845. */
  846. qc->flags |= ATA_QCFLAG_SENSE_VALID;
  847. } else
  848. qc->err_mask |= tmp;
  849. }
  850. if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
  851. action |= ATA_EH_SOFTRESET;
  852. return action;
  853. }
  854. static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
  855. {
  856. if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
  857. return 1;
  858. if (ent->is_io) {
  859. if (ent->err_mask & AC_ERR_HSM)
  860. return 1;
  861. if ((ent->err_mask &
  862. (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
  863. return 2;
  864. }
  865. return 0;
  866. }
  867. struct speed_down_needed_arg {
  868. u64 since;
  869. int nr_errors[3];
  870. };
  871. static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
  872. {
  873. struct speed_down_needed_arg *arg = void_arg;
  874. if (ent->timestamp < arg->since)
  875. return -1;
  876. arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
  877. return 0;
  878. }
  879. /**
  880. * ata_eh_speed_down_needed - Determine wheter speed down is necessary
  881. * @dev: Device of interest
  882. *
  883. * This function examines error ring of @dev and determines
  884. * whether speed down is necessary. Speed down is necessary if
  885. * there have been more than 3 of Cat-1 errors or 10 of Cat-2
  886. * errors during last 15 minutes.
  887. *
  888. * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
  889. * violation for known supported commands.
  890. *
  891. * Cat-2 errors are unclassified DEV error for known supported
  892. * command.
  893. *
  894. * LOCKING:
  895. * Inherited from caller.
  896. *
  897. * RETURNS:
  898. * 1 if speed down is necessary, 0 otherwise
  899. */
  900. static int ata_eh_speed_down_needed(struct ata_device *dev)
  901. {
  902. const u64 interval = 15LLU * 60 * HZ;
  903. static const int err_limits[3] = { -1, 3, 10 };
  904. struct speed_down_needed_arg arg;
  905. struct ata_ering_entry *ent;
  906. int err_cat;
  907. u64 j64;
  908. ent = ata_ering_top(&dev->ering);
  909. if (!ent)
  910. return 0;
  911. err_cat = ata_eh_categorize_ering_entry(ent);
  912. if (err_cat == 0)
  913. return 0;
  914. memset(&arg, 0, sizeof(arg));
  915. j64 = get_jiffies_64();
  916. if (j64 >= interval)
  917. arg.since = j64 - interval;
  918. else
  919. arg.since = 0;
  920. ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
  921. return arg.nr_errors[err_cat] > err_limits[err_cat];
  922. }
  923. /**
  924. * ata_eh_speed_down - record error and speed down if necessary
  925. * @dev: Failed device
  926. * @is_io: Did the device fail during normal IO?
  927. * @err_mask: err_mask of the error
  928. *
  929. * Record error and examine error history to determine whether
  930. * adjusting transmission speed is necessary. It also sets
  931. * transmission limits appropriately if such adjustment is
  932. * necessary.
  933. *
  934. * LOCKING:
  935. * Kernel thread context (may sleep).
  936. *
  937. * RETURNS:
  938. * 0 on success, -errno otherwise
  939. */
  940. static int ata_eh_speed_down(struct ata_device *dev, int is_io,
  941. unsigned int err_mask)
  942. {
  943. if (!err_mask)
  944. return 0;
  945. /* record error and determine whether speed down is necessary */
  946. ata_ering_record(&dev->ering, is_io, err_mask);
  947. if (!ata_eh_speed_down_needed(dev))
  948. return 0;
  949. /* speed down SATA link speed if possible */
  950. if (sata_down_spd_limit(dev->ap) == 0)
  951. return ATA_EH_HARDRESET;
  952. /* lower transfer mode */
  953. if (ata_down_xfermask_limit(dev, 0) == 0)
  954. return ATA_EH_SOFTRESET;
  955. ata_dev_printk(dev, KERN_ERR,
  956. "speed down requested but no transfer mode left\n");
  957. return 0;
  958. }
  959. /**
  960. * ata_eh_autopsy - analyze error and determine recovery action
  961. * @ap: ATA port to perform autopsy on
  962. *
  963. * Analyze why @ap failed and determine which recovery action is
  964. * needed. This function also sets more detailed AC_ERR_* values
  965. * and fills sense data for ATAPI CHECK SENSE.
  966. *
  967. * LOCKING:
  968. * Kernel thread context (may sleep).
  969. */
  970. static void ata_eh_autopsy(struct ata_port *ap)
  971. {
  972. struct ata_eh_context *ehc = &ap->eh_context;
  973. unsigned int action = ehc->i.action;
  974. struct ata_device *failed_dev = NULL;
  975. unsigned int all_err_mask = 0;
  976. int tag, is_io = 0;
  977. u32 serror;
  978. int rc;
  979. DPRINTK("ENTER\n");
  980. /* obtain and analyze SError */
  981. rc = sata_scr_read(ap, SCR_ERROR, &serror);
  982. if (rc == 0) {
  983. ehc->i.serror |= serror;
  984. ata_eh_analyze_serror(ap);
  985. } else if (rc != -EOPNOTSUPP)
  986. action |= ATA_EH_HARDRESET;
  987. /* analyze NCQ failure */
  988. ata_eh_analyze_ncq_error(ap);
  989. /* any real error trumps AC_ERR_OTHER */
  990. if (ehc->i.err_mask & ~AC_ERR_OTHER)
  991. ehc->i.err_mask &= ~AC_ERR_OTHER;
  992. all_err_mask |= ehc->i.err_mask;
  993. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  994. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  995. if (!(qc->flags & ATA_QCFLAG_FAILED))
  996. continue;
  997. /* inherit upper level err_mask */
  998. qc->err_mask |= ehc->i.err_mask;
  999. if (qc->err_mask & AC_ERR_TIMEOUT)
  1000. action |= ATA_EH_SOFTRESET;
  1001. /* analyze TF */
  1002. action |= ata_eh_analyze_tf(qc, &qc->result_tf);
  1003. /* DEV errors are probably spurious in case of ATA_BUS error */
  1004. if (qc->err_mask & AC_ERR_ATA_BUS)
  1005. qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
  1006. AC_ERR_INVALID);
  1007. /* any real error trumps unknown error */
  1008. if (qc->err_mask & ~AC_ERR_OTHER)
  1009. qc->err_mask &= ~AC_ERR_OTHER;
  1010. /* SENSE_VALID trumps dev/unknown error and revalidation */
  1011. if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
  1012. qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
  1013. action &= ~ATA_EH_REVALIDATE;
  1014. }
  1015. /* accumulate error info */
  1016. failed_dev = qc->dev;
  1017. all_err_mask |= qc->err_mask;
  1018. if (qc->flags & ATA_QCFLAG_IO)
  1019. is_io = 1;
  1020. }
  1021. /* speed down iff command was in progress */
  1022. if (failed_dev)
  1023. action |= ata_eh_speed_down(failed_dev, is_io, all_err_mask);
  1024. if (all_err_mask)
  1025. action |= ATA_EH_REVALIDATE;
  1026. ehc->i.dev = failed_dev;
  1027. ehc->i.action = action;
  1028. DPRINTK("EXIT\n");
  1029. }
  1030. /**
  1031. * ata_eh_report - report error handling to user
  1032. * @ap: ATA port EH is going on
  1033. *
  1034. * Report EH to user.
  1035. *
  1036. * LOCKING:
  1037. * None.
  1038. */
  1039. static void ata_eh_report(struct ata_port *ap)
  1040. {
  1041. struct ata_eh_context *ehc = &ap->eh_context;
  1042. const char *frozen, *desc;
  1043. int tag, nr_failed = 0;
  1044. desc = NULL;
  1045. if (ehc->i.desc[0] != '\0')
  1046. desc = ehc->i.desc;
  1047. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1048. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1049. if (!(qc->flags & ATA_QCFLAG_FAILED))
  1050. continue;
  1051. if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
  1052. continue;
  1053. nr_failed++;
  1054. }
  1055. if (!nr_failed && !ehc->i.err_mask)
  1056. return;
  1057. frozen = "";
  1058. if (ap->flags & ATA_FLAG_FROZEN)
  1059. frozen = " frozen";
  1060. if (ehc->i.dev) {
  1061. ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x "
  1062. "SAct 0x%x SErr 0x%x action 0x%x%s\n",
  1063. ehc->i.err_mask, ap->sactive, ehc->i.serror,
  1064. ehc->i.action, frozen);
  1065. if (desc)
  1066. ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc);
  1067. } else {
  1068. ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x "
  1069. "SAct 0x%x SErr 0x%x action 0x%x%s\n",
  1070. ehc->i.err_mask, ap->sactive, ehc->i.serror,
  1071. ehc->i.action, frozen);
  1072. if (desc)
  1073. ata_port_printk(ap, KERN_ERR, "(%s)\n", desc);
  1074. }
  1075. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1076. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1077. if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask)
  1078. continue;
  1079. ata_dev_printk(qc->dev, KERN_ERR, "tag %d cmd 0x%x "
  1080. "Emask 0x%x stat 0x%x err 0x%x (%s)\n",
  1081. qc->tag, qc->tf.command, qc->err_mask,
  1082. qc->result_tf.command, qc->result_tf.feature,
  1083. ata_err_string(qc->err_mask));
  1084. }
  1085. }
  1086. static int ata_eh_reset(struct ata_port *ap, ata_reset_fn_t softreset,
  1087. ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
  1088. {
  1089. struct ata_eh_context *ehc = &ap->eh_context;
  1090. unsigned int classes[ATA_MAX_DEVICES];
  1091. int tries = ATA_EH_RESET_TRIES;
  1092. ata_reset_fn_t reset;
  1093. int rc;
  1094. if (softreset && (!hardreset || (!sata_set_spd_needed(ap) &&
  1095. !(ehc->i.action & ATA_EH_HARDRESET))))
  1096. reset = softreset;
  1097. else
  1098. reset = hardreset;
  1099. retry:
  1100. ata_port_printk(ap, KERN_INFO, "%s resetting port\n",
  1101. reset == softreset ? "soft" : "hard");
  1102. /* reset */
  1103. ata_eh_about_to_do(ap, ATA_EH_RESET_MASK);
  1104. ehc->i.flags |= ATA_EHI_DID_RESET;
  1105. rc = ata_do_reset(ap, reset, classes);
  1106. if (rc && --tries) {
  1107. ata_port_printk(ap, KERN_WARNING,
  1108. "%sreset failed, retrying in 5 secs\n",
  1109. reset == softreset ? "soft" : "hard");
  1110. ssleep(5);
  1111. if (reset == hardreset)
  1112. sata_down_spd_limit(ap);
  1113. if (hardreset)
  1114. reset = hardreset;
  1115. goto retry;
  1116. }
  1117. if (rc == 0) {
  1118. if (postreset)
  1119. postreset(ap, classes);
  1120. /* reset successful, schedule revalidation */
  1121. ehc->i.dev = NULL;
  1122. ehc->i.action &= ~ATA_EH_RESET_MASK;
  1123. ehc->i.action |= ATA_EH_REVALIDATE;
  1124. }
  1125. return rc;
  1126. }
  1127. static int ata_eh_revalidate(struct ata_port *ap,
  1128. struct ata_device **r_failed_dev)
  1129. {
  1130. struct ata_eh_context *ehc = &ap->eh_context;
  1131. struct ata_device *dev;
  1132. int i, rc = 0;
  1133. DPRINTK("ENTER\n");
  1134. for (i = 0; i < ATA_MAX_DEVICES; i++) {
  1135. dev = &ap->device[i];
  1136. if (ehc->i.action & ATA_EH_REVALIDATE && ata_dev_enabled(dev) &&
  1137. (!ehc->i.dev || ehc->i.dev == dev)) {
  1138. if (ata_port_offline(ap)) {
  1139. rc = -EIO;
  1140. break;
  1141. }
  1142. ata_eh_about_to_do(ap, ATA_EH_REVALIDATE);
  1143. rc = ata_dev_revalidate(dev,
  1144. ehc->i.flags & ATA_EHI_DID_RESET);
  1145. if (rc)
  1146. break;
  1147. ehc->i.action &= ~ATA_EH_REVALIDATE;
  1148. }
  1149. }
  1150. if (rc)
  1151. *r_failed_dev = dev;
  1152. DPRINTK("EXIT\n");
  1153. return rc;
  1154. }
  1155. static int ata_port_nr_enabled(struct ata_port *ap)
  1156. {
  1157. int i, cnt = 0;
  1158. for (i = 0; i < ATA_MAX_DEVICES; i++)
  1159. if (ata_dev_enabled(&ap->device[i]))
  1160. cnt++;
  1161. return cnt;
  1162. }
  1163. /**
  1164. * ata_eh_recover - recover host port after error
  1165. * @ap: host port to recover
  1166. * @softreset: softreset method (can be NULL)
  1167. * @hardreset: hardreset method (can be NULL)
  1168. * @postreset: postreset method (can be NULL)
  1169. *
  1170. * This is the alpha and omega, eum and yang, heart and soul of
  1171. * libata exception handling. On entry, actions required to
  1172. * recover each devices are recorded in eh_context. This
  1173. * function executes all the operations with appropriate retrials
  1174. * and fallbacks to resurrect failed devices.
  1175. *
  1176. * LOCKING:
  1177. * Kernel thread context (may sleep).
  1178. *
  1179. * RETURNS:
  1180. * 0 on success, -errno on failure.
  1181. */
  1182. static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset,
  1183. ata_reset_fn_t hardreset,
  1184. ata_postreset_fn_t postreset)
  1185. {
  1186. struct ata_eh_context *ehc = &ap->eh_context;
  1187. struct ata_device *dev;
  1188. int down_xfermask, i, rc;
  1189. DPRINTK("ENTER\n");
  1190. /* prep for recovery */
  1191. for (i = 0; i < ATA_MAX_DEVICES; i++) {
  1192. dev = &ap->device[i];
  1193. ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
  1194. }
  1195. retry:
  1196. down_xfermask = 0;
  1197. rc = 0;
  1198. /* skip EH if possible. */
  1199. if (!ata_port_nr_enabled(ap) && !(ap->flags & ATA_FLAG_FROZEN))
  1200. ehc->i.action = 0;
  1201. /* reset */
  1202. if (ehc->i.action & ATA_EH_RESET_MASK) {
  1203. ata_eh_freeze_port(ap);
  1204. rc = ata_eh_reset(ap, softreset, hardreset, postreset);
  1205. if (rc) {
  1206. ata_port_printk(ap, KERN_ERR,
  1207. "reset failed, giving up\n");
  1208. goto out;
  1209. }
  1210. ata_eh_thaw_port(ap);
  1211. }
  1212. /* revalidate existing devices */
  1213. rc = ata_eh_revalidate(ap, &dev);
  1214. if (rc)
  1215. goto dev_fail;
  1216. /* configure transfer mode if the port has been reset */
  1217. if (ehc->i.flags & ATA_EHI_DID_RESET) {
  1218. rc = ata_set_mode(ap, &dev);
  1219. if (rc) {
  1220. down_xfermask = 1;
  1221. goto dev_fail;
  1222. }
  1223. }
  1224. goto out;
  1225. dev_fail:
  1226. switch (rc) {
  1227. case -ENODEV:
  1228. case -EINVAL:
  1229. ehc->tries[dev->devno] = 0;
  1230. break;
  1231. case -EIO:
  1232. sata_down_spd_limit(ap);
  1233. default:
  1234. ehc->tries[dev->devno]--;
  1235. if (down_xfermask &&
  1236. ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1))
  1237. ehc->tries[dev->devno] = 0;
  1238. }
  1239. /* disable device if it has used up all its chances */
  1240. if (ata_dev_enabled(dev) && !ehc->tries[dev->devno])
  1241. ata_dev_disable(dev);
  1242. /* soft didn't work? be haaaaard */
  1243. if (ehc->i.flags & ATA_EHI_DID_RESET)
  1244. ehc->i.action |= ATA_EH_HARDRESET;
  1245. else
  1246. ehc->i.action |= ATA_EH_SOFTRESET;
  1247. if (ata_port_nr_enabled(ap)) {
  1248. ata_port_printk(ap, KERN_WARNING, "failed to recover some "
  1249. "devices, retrying in 5 secs\n");
  1250. ssleep(5);
  1251. } else {
  1252. /* no device left, repeat fast */
  1253. msleep(500);
  1254. }
  1255. goto retry;
  1256. out:
  1257. if (rc) {
  1258. for (i = 0; i < ATA_MAX_DEVICES; i++)
  1259. ata_dev_disable(&ap->device[i]);
  1260. }
  1261. DPRINTK("EXIT, rc=%d\n", rc);
  1262. return rc;
  1263. }
  1264. /**
  1265. * ata_eh_finish - finish up EH
  1266. * @ap: host port to finish EH for
  1267. *
  1268. * Recovery is complete. Clean up EH states and retry or finish
  1269. * failed qcs.
  1270. *
  1271. * LOCKING:
  1272. * None.
  1273. */
  1274. static void ata_eh_finish(struct ata_port *ap)
  1275. {
  1276. int tag;
  1277. /* retry or finish qcs */
  1278. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1279. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1280. if (!(qc->flags & ATA_QCFLAG_FAILED))
  1281. continue;
  1282. if (qc->err_mask) {
  1283. /* FIXME: Once EH migration is complete,
  1284. * generate sense data in this function,
  1285. * considering both err_mask and tf.
  1286. */
  1287. if (qc->err_mask & AC_ERR_INVALID)
  1288. ata_eh_qc_complete(qc);
  1289. else
  1290. ata_eh_qc_retry(qc);
  1291. } else {
  1292. if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
  1293. ata_eh_qc_complete(qc);
  1294. } else {
  1295. /* feed zero TF to sense generation */
  1296. memset(&qc->result_tf, 0, sizeof(qc->result_tf));
  1297. ata_eh_qc_retry(qc);
  1298. }
  1299. }
  1300. }
  1301. }
  1302. /**
  1303. * ata_do_eh - do standard error handling
  1304. * @ap: host port to handle error for
  1305. * @softreset: softreset method (can be NULL)
  1306. * @hardreset: hardreset method (can be NULL)
  1307. * @postreset: postreset method (can be NULL)
  1308. *
  1309. * Perform standard error handling sequence.
  1310. *
  1311. * LOCKING:
  1312. * Kernel thread context (may sleep).
  1313. */
  1314. void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset,
  1315. ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
  1316. {
  1317. ata_eh_autopsy(ap);
  1318. ata_eh_report(ap);
  1319. ata_eh_recover(ap, softreset, hardreset, postreset);
  1320. ata_eh_finish(ap);
  1321. }