libata-eh.c 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561
  1. /*
  2. * libata-eh.c - libata error handling
  3. *
  4. * Maintained by: Jeff Garzik <jgarzik@pobox.com>
  5. * Please ALWAYS copy linux-ide@vger.kernel.org
  6. * on emails.
  7. *
  8. * Copyright 2006 Tejun Heo <htejun@gmail.com>
  9. *
  10. *
  11. * This program is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU General Public License as
  13. * published by the Free Software Foundation; either version 2, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. * General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; see the file COPYING. If not, write to
  23. * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
  24. * USA.
  25. *
  26. *
  27. * libata documentation is available via 'make {ps|pdf}docs',
  28. * as Documentation/DocBook/libata.*
  29. *
  30. * Hardware documentation available from http://www.t13.org/ and
  31. * http://www.sata-io.org/
  32. *
  33. */
  34. #include <linux/config.h>
  35. #include <linux/kernel.h>
  36. #include <scsi/scsi.h>
  37. #include <scsi/scsi_host.h>
  38. #include <scsi/scsi_eh.h>
  39. #include <scsi/scsi_device.h>
  40. #include <scsi/scsi_cmnd.h>
  41. #include "scsi_transport_api.h"
  42. #include <linux/libata.h>
  43. #include "libata.h"
  44. static void __ata_port_freeze(struct ata_port *ap);
  45. static void ata_ering_record(struct ata_ering *ering, int is_io,
  46. unsigned int err_mask)
  47. {
  48. struct ata_ering_entry *ent;
  49. WARN_ON(!err_mask);
  50. ering->cursor++;
  51. ering->cursor %= ATA_ERING_SIZE;
  52. ent = &ering->ring[ering->cursor];
  53. ent->is_io = is_io;
  54. ent->err_mask = err_mask;
  55. ent->timestamp = get_jiffies_64();
  56. }
  57. static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering)
  58. {
  59. struct ata_ering_entry *ent = &ering->ring[ering->cursor];
  60. if (!ent->err_mask)
  61. return NULL;
  62. return ent;
  63. }
  64. static int ata_ering_map(struct ata_ering *ering,
  65. int (*map_fn)(struct ata_ering_entry *, void *),
  66. void *arg)
  67. {
  68. int idx, rc = 0;
  69. struct ata_ering_entry *ent;
  70. idx = ering->cursor;
  71. do {
  72. ent = &ering->ring[idx];
  73. if (!ent->err_mask)
  74. break;
  75. rc = map_fn(ent, arg);
  76. if (rc)
  77. break;
  78. idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
  79. } while (idx != ering->cursor);
  80. return rc;
  81. }
  82. /**
  83. * ata_scsi_timed_out - SCSI layer time out callback
  84. * @cmd: timed out SCSI command
  85. *
  86. * Handles SCSI layer timeout. We race with normal completion of
  87. * the qc for @cmd. If the qc is already gone, we lose and let
  88. * the scsi command finish (EH_HANDLED). Otherwise, the qc has
  89. * timed out and EH should be invoked. Prevent ata_qc_complete()
  90. * from finishing it by setting EH_SCHEDULED and return
  91. * EH_NOT_HANDLED.
  92. *
  93. * TODO: kill this function once old EH is gone.
  94. *
  95. * LOCKING:
  96. * Called from timer context
  97. *
  98. * RETURNS:
  99. * EH_HANDLED or EH_NOT_HANDLED
  100. */
  101. enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
  102. {
  103. struct Scsi_Host *host = cmd->device->host;
  104. struct ata_port *ap = ata_shost_to_port(host);
  105. unsigned long flags;
  106. struct ata_queued_cmd *qc;
  107. enum scsi_eh_timer_return ret;
  108. DPRINTK("ENTER\n");
  109. if (ap->ops->error_handler) {
  110. ret = EH_NOT_HANDLED;
  111. goto out;
  112. }
  113. ret = EH_HANDLED;
  114. spin_lock_irqsave(&ap->host_set->lock, flags);
  115. qc = ata_qc_from_tag(ap, ap->active_tag);
  116. if (qc) {
  117. WARN_ON(qc->scsicmd != cmd);
  118. qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
  119. qc->err_mask |= AC_ERR_TIMEOUT;
  120. ret = EH_NOT_HANDLED;
  121. }
  122. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  123. out:
  124. DPRINTK("EXIT, ret=%d\n", ret);
  125. return ret;
  126. }
  127. /**
  128. * ata_scsi_error - SCSI layer error handler callback
  129. * @host: SCSI host on which error occurred
  130. *
  131. * Handles SCSI-layer-thrown error events.
  132. *
  133. * LOCKING:
  134. * Inherited from SCSI layer (none, can sleep)
  135. *
  136. * RETURNS:
  137. * Zero.
  138. */
  139. void ata_scsi_error(struct Scsi_Host *host)
  140. {
  141. struct ata_port *ap = ata_shost_to_port(host);
  142. spinlock_t *hs_lock = &ap->host_set->lock;
  143. int i, repeat_cnt = ATA_EH_MAX_REPEAT;
  144. unsigned long flags;
  145. DPRINTK("ENTER\n");
  146. /* synchronize with port task */
  147. ata_port_flush_task(ap);
  148. /* synchronize with host_set lock and sort out timeouts */
  149. /* For new EH, all qcs are finished in one of three ways -
  150. * normal completion, error completion, and SCSI timeout.
  151. * Both cmpletions can race against SCSI timeout. When normal
  152. * completion wins, the qc never reaches EH. When error
  153. * completion wins, the qc has ATA_QCFLAG_FAILED set.
  154. *
  155. * When SCSI timeout wins, things are a bit more complex.
  156. * Normal or error completion can occur after the timeout but
  157. * before this point. In such cases, both types of
  158. * completions are honored. A scmd is determined to have
  159. * timed out iff its associated qc is active and not failed.
  160. */
  161. if (ap->ops->error_handler) {
  162. struct scsi_cmnd *scmd, *tmp;
  163. int nr_timedout = 0;
  164. spin_lock_irqsave(hs_lock, flags);
  165. list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
  166. struct ata_queued_cmd *qc;
  167. for (i = 0; i < ATA_MAX_QUEUE; i++) {
  168. qc = __ata_qc_from_tag(ap, i);
  169. if (qc->flags & ATA_QCFLAG_ACTIVE &&
  170. qc->scsicmd == scmd)
  171. break;
  172. }
  173. if (i < ATA_MAX_QUEUE) {
  174. /* the scmd has an associated qc */
  175. if (!(qc->flags & ATA_QCFLAG_FAILED)) {
  176. /* which hasn't failed yet, timeout */
  177. qc->err_mask |= AC_ERR_TIMEOUT;
  178. qc->flags |= ATA_QCFLAG_FAILED;
  179. nr_timedout++;
  180. }
  181. } else {
  182. /* Normal completion occurred after
  183. * SCSI timeout but before this point.
  184. * Successfully complete it.
  185. */
  186. scmd->retries = scmd->allowed;
  187. scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
  188. }
  189. }
  190. /* If we have timed out qcs. They belong to EH from
  191. * this point but the state of the controller is
  192. * unknown. Freeze the port to make sure the IRQ
  193. * handler doesn't diddle with those qcs. This must
  194. * be done atomically w.r.t. setting QCFLAG_FAILED.
  195. */
  196. if (nr_timedout)
  197. __ata_port_freeze(ap);
  198. spin_unlock_irqrestore(hs_lock, flags);
  199. } else
  200. spin_unlock_wait(hs_lock);
  201. repeat:
  202. /* invoke error handler */
  203. if (ap->ops->error_handler) {
  204. /* fetch & clear EH info */
  205. spin_lock_irqsave(hs_lock, flags);
  206. memset(&ap->eh_context, 0, sizeof(ap->eh_context));
  207. ap->eh_context.i = ap->eh_info;
  208. memset(&ap->eh_info, 0, sizeof(ap->eh_info));
  209. ap->flags &= ~ATA_FLAG_EH_PENDING;
  210. spin_unlock_irqrestore(hs_lock, flags);
  211. /* invoke EH */
  212. ap->ops->error_handler(ap);
  213. /* Exception might have happend after ->error_handler
  214. * recovered the port but before this point. Repeat
  215. * EH in such case.
  216. */
  217. spin_lock_irqsave(hs_lock, flags);
  218. if (ap->flags & ATA_FLAG_EH_PENDING) {
  219. if (--repeat_cnt) {
  220. ata_port_printk(ap, KERN_INFO,
  221. "EH pending after completion, "
  222. "repeating EH (cnt=%d)\n", repeat_cnt);
  223. spin_unlock_irqrestore(hs_lock, flags);
  224. goto repeat;
  225. }
  226. ata_port_printk(ap, KERN_ERR, "EH pending after %d "
  227. "tries, giving up\n", ATA_EH_MAX_REPEAT);
  228. }
  229. /* this run is complete, make sure EH info is clear */
  230. memset(&ap->eh_info, 0, sizeof(ap->eh_info));
  231. /* Clear host_eh_scheduled while holding hs_lock such
  232. * that if exception occurs after this point but
  233. * before EH completion, SCSI midlayer will
  234. * re-initiate EH.
  235. */
  236. host->host_eh_scheduled = 0;
  237. spin_unlock_irqrestore(hs_lock, flags);
  238. } else {
  239. WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
  240. ap->ops->eng_timeout(ap);
  241. }
  242. /* finish or retry handled scmd's and clean up */
  243. WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
  244. scsi_eh_flush_done_q(&ap->eh_done_q);
  245. /* clean up */
  246. spin_lock_irqsave(hs_lock, flags);
  247. if (ap->flags & ATA_FLAG_RECOVERED)
  248. ata_port_printk(ap, KERN_INFO, "EH complete\n");
  249. ap->flags &= ~ATA_FLAG_RECOVERED;
  250. spin_unlock_irqrestore(hs_lock, flags);
  251. DPRINTK("EXIT\n");
  252. }
  253. /**
  254. * ata_qc_timeout - Handle timeout of queued command
  255. * @qc: Command that timed out
  256. *
  257. * Some part of the kernel (currently, only the SCSI layer)
  258. * has noticed that the active command on port @ap has not
  259. * completed after a specified length of time. Handle this
  260. * condition by disabling DMA (if necessary) and completing
  261. * transactions, with error if necessary.
  262. *
  263. * This also handles the case of the "lost interrupt", where
  264. * for some reason (possibly hardware bug, possibly driver bug)
  265. * an interrupt was not delivered to the driver, even though the
  266. * transaction completed successfully.
  267. *
  268. * TODO: kill this function once old EH is gone.
  269. *
  270. * LOCKING:
  271. * Inherited from SCSI layer (none, can sleep)
  272. */
  273. static void ata_qc_timeout(struct ata_queued_cmd *qc)
  274. {
  275. struct ata_port *ap = qc->ap;
  276. struct ata_host_set *host_set = ap->host_set;
  277. u8 host_stat = 0, drv_stat;
  278. unsigned long flags;
  279. DPRINTK("ENTER\n");
  280. ap->hsm_task_state = HSM_ST_IDLE;
  281. spin_lock_irqsave(&host_set->lock, flags);
  282. switch (qc->tf.protocol) {
  283. case ATA_PROT_DMA:
  284. case ATA_PROT_ATAPI_DMA:
  285. host_stat = ap->ops->bmdma_status(ap);
  286. /* before we do anything else, clear DMA-Start bit */
  287. ap->ops->bmdma_stop(qc);
  288. /* fall through */
  289. default:
  290. ata_altstatus(ap);
  291. drv_stat = ata_chk_status(ap);
  292. /* ack bmdma irq events */
  293. ap->ops->irq_clear(ap);
  294. ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, "
  295. "stat 0x%x host_stat 0x%x\n",
  296. qc->tf.command, drv_stat, host_stat);
  297. /* complete taskfile transaction */
  298. qc->err_mask |= AC_ERR_TIMEOUT;
  299. break;
  300. }
  301. spin_unlock_irqrestore(&host_set->lock, flags);
  302. ata_eh_qc_complete(qc);
  303. DPRINTK("EXIT\n");
  304. }
  305. /**
  306. * ata_eng_timeout - Handle timeout of queued command
  307. * @ap: Port on which timed-out command is active
  308. *
  309. * Some part of the kernel (currently, only the SCSI layer)
  310. * has noticed that the active command on port @ap has not
  311. * completed after a specified length of time. Handle this
  312. * condition by disabling DMA (if necessary) and completing
  313. * transactions, with error if necessary.
  314. *
  315. * This also handles the case of the "lost interrupt", where
  316. * for some reason (possibly hardware bug, possibly driver bug)
  317. * an interrupt was not delivered to the driver, even though the
  318. * transaction completed successfully.
  319. *
  320. * TODO: kill this function once old EH is gone.
  321. *
  322. * LOCKING:
  323. * Inherited from SCSI layer (none, can sleep)
  324. */
  325. void ata_eng_timeout(struct ata_port *ap)
  326. {
  327. DPRINTK("ENTER\n");
  328. ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
  329. DPRINTK("EXIT\n");
  330. }
  331. /**
  332. * ata_qc_schedule_eh - schedule qc for error handling
  333. * @qc: command to schedule error handling for
  334. *
  335. * Schedule error handling for @qc. EH will kick in as soon as
  336. * other commands are drained.
  337. *
  338. * LOCKING:
  339. * spin_lock_irqsave(host_set lock)
  340. */
  341. void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
  342. {
  343. struct ata_port *ap = qc->ap;
  344. WARN_ON(!ap->ops->error_handler);
  345. qc->flags |= ATA_QCFLAG_FAILED;
  346. qc->ap->flags |= ATA_FLAG_EH_PENDING;
  347. /* The following will fail if timeout has already expired.
  348. * ata_scsi_error() takes care of such scmds on EH entry.
  349. * Note that ATA_QCFLAG_FAILED is unconditionally set after
  350. * this function completes.
  351. */
  352. scsi_req_abort_cmd(qc->scsicmd);
  353. }
  354. /**
  355. * ata_port_schedule_eh - schedule error handling without a qc
  356. * @ap: ATA port to schedule EH for
  357. *
  358. * Schedule error handling for @ap. EH will kick in as soon as
  359. * all commands are drained.
  360. *
  361. * LOCKING:
  362. * spin_lock_irqsave(host_set lock)
  363. */
  364. void ata_port_schedule_eh(struct ata_port *ap)
  365. {
  366. WARN_ON(!ap->ops->error_handler);
  367. ap->flags |= ATA_FLAG_EH_PENDING;
  368. scsi_schedule_eh(ap->host);
  369. DPRINTK("port EH scheduled\n");
  370. }
  371. /**
  372. * ata_port_abort - abort all qc's on the port
  373. * @ap: ATA port to abort qc's for
  374. *
  375. * Abort all active qc's of @ap and schedule EH.
  376. *
  377. * LOCKING:
  378. * spin_lock_irqsave(host_set lock)
  379. *
  380. * RETURNS:
  381. * Number of aborted qc's.
  382. */
  383. int ata_port_abort(struct ata_port *ap)
  384. {
  385. int tag, nr_aborted = 0;
  386. WARN_ON(!ap->ops->error_handler);
  387. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  388. struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
  389. if (qc) {
  390. qc->flags |= ATA_QCFLAG_FAILED;
  391. ata_qc_complete(qc);
  392. nr_aborted++;
  393. }
  394. }
  395. if (!nr_aborted)
  396. ata_port_schedule_eh(ap);
  397. return nr_aborted;
  398. }
  399. /**
  400. * __ata_port_freeze - freeze port
  401. * @ap: ATA port to freeze
  402. *
  403. * This function is called when HSM violation or some other
  404. * condition disrupts normal operation of the port. Frozen port
  405. * is not allowed to perform any operation until the port is
  406. * thawed, which usually follows a successful reset.
  407. *
  408. * ap->ops->freeze() callback can be used for freezing the port
  409. * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
  410. * port cannot be frozen hardware-wise, the interrupt handler
  411. * must ack and clear interrupts unconditionally while the port
  412. * is frozen.
  413. *
  414. * LOCKING:
  415. * spin_lock_irqsave(host_set lock)
  416. */
  417. static void __ata_port_freeze(struct ata_port *ap)
  418. {
  419. WARN_ON(!ap->ops->error_handler);
  420. if (ap->ops->freeze)
  421. ap->ops->freeze(ap);
  422. ap->flags |= ATA_FLAG_FROZEN;
  423. DPRINTK("ata%u port frozen\n", ap->id);
  424. }
  425. /**
  426. * ata_port_freeze - abort & freeze port
  427. * @ap: ATA port to freeze
  428. *
  429. * Abort and freeze @ap.
  430. *
  431. * LOCKING:
  432. * spin_lock_irqsave(host_set lock)
  433. *
  434. * RETURNS:
  435. * Number of aborted commands.
  436. */
  437. int ata_port_freeze(struct ata_port *ap)
  438. {
  439. int nr_aborted;
  440. WARN_ON(!ap->ops->error_handler);
  441. nr_aborted = ata_port_abort(ap);
  442. __ata_port_freeze(ap);
  443. return nr_aborted;
  444. }
  445. /**
  446. * ata_eh_freeze_port - EH helper to freeze port
  447. * @ap: ATA port to freeze
  448. *
  449. * Freeze @ap.
  450. *
  451. * LOCKING:
  452. * None.
  453. */
  454. void ata_eh_freeze_port(struct ata_port *ap)
  455. {
  456. unsigned long flags;
  457. if (!ap->ops->error_handler)
  458. return;
  459. spin_lock_irqsave(&ap->host_set->lock, flags);
  460. __ata_port_freeze(ap);
  461. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  462. }
  463. /**
  464. * ata_port_thaw_port - EH helper to thaw port
  465. * @ap: ATA port to thaw
  466. *
  467. * Thaw frozen port @ap.
  468. *
  469. * LOCKING:
  470. * None.
  471. */
  472. void ata_eh_thaw_port(struct ata_port *ap)
  473. {
  474. unsigned long flags;
  475. if (!ap->ops->error_handler)
  476. return;
  477. spin_lock_irqsave(&ap->host_set->lock, flags);
  478. ap->flags &= ~ATA_FLAG_FROZEN;
  479. if (ap->ops->thaw)
  480. ap->ops->thaw(ap);
  481. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  482. DPRINTK("ata%u port thawed\n", ap->id);
  483. }
  484. static void ata_eh_scsidone(struct scsi_cmnd *scmd)
  485. {
  486. /* nada */
  487. }
  488. static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
  489. {
  490. struct ata_port *ap = qc->ap;
  491. struct scsi_cmnd *scmd = qc->scsicmd;
  492. unsigned long flags;
  493. spin_lock_irqsave(&ap->host_set->lock, flags);
  494. qc->scsidone = ata_eh_scsidone;
  495. __ata_qc_complete(qc);
  496. WARN_ON(ata_tag_valid(qc->tag));
  497. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  498. scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
  499. }
  500. /**
  501. * ata_eh_qc_complete - Complete an active ATA command from EH
  502. * @qc: Command to complete
  503. *
  504. * Indicate to the mid and upper layers that an ATA command has
  505. * completed. To be used from EH.
  506. */
  507. void ata_eh_qc_complete(struct ata_queued_cmd *qc)
  508. {
  509. struct scsi_cmnd *scmd = qc->scsicmd;
  510. scmd->retries = scmd->allowed;
  511. __ata_eh_qc_complete(qc);
  512. }
  513. /**
  514. * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
  515. * @qc: Command to retry
  516. *
  517. * Indicate to the mid and upper layers that an ATA command
  518. * should be retried. To be used from EH.
  519. *
  520. * SCSI midlayer limits the number of retries to scmd->allowed.
  521. * scmd->retries is decremented for commands which get retried
  522. * due to unrelated failures (qc->err_mask is zero).
  523. */
  524. void ata_eh_qc_retry(struct ata_queued_cmd *qc)
  525. {
  526. struct scsi_cmnd *scmd = qc->scsicmd;
  527. if (!qc->err_mask && scmd->retries)
  528. scmd->retries--;
  529. __ata_eh_qc_complete(qc);
  530. }
  531. /**
  532. * ata_eh_about_to_do - about to perform eh_action
  533. * @ap: target ATA port
  534. * @action: action about to be performed
  535. *
  536. * Called just before performing EH actions to clear related bits
  537. * in @ap->eh_info such that eh actions are not unnecessarily
  538. * repeated.
  539. *
  540. * LOCKING:
  541. * None.
  542. */
  543. static void ata_eh_about_to_do(struct ata_port *ap, unsigned int action)
  544. {
  545. unsigned long flags;
  546. spin_lock_irqsave(&ap->host_set->lock, flags);
  547. ap->eh_info.action &= ~action;
  548. ap->flags |= ATA_FLAG_RECOVERED;
  549. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  550. }
  551. /**
  552. * ata_err_string - convert err_mask to descriptive string
  553. * @err_mask: error mask to convert to string
  554. *
  555. * Convert @err_mask to descriptive string. Errors are
  556. * prioritized according to severity and only the most severe
  557. * error is reported.
  558. *
  559. * LOCKING:
  560. * None.
  561. *
  562. * RETURNS:
  563. * Descriptive string for @err_mask
  564. */
  565. static const char * ata_err_string(unsigned int err_mask)
  566. {
  567. if (err_mask & AC_ERR_HOST_BUS)
  568. return "host bus error";
  569. if (err_mask & AC_ERR_ATA_BUS)
  570. return "ATA bus error";
  571. if (err_mask & AC_ERR_TIMEOUT)
  572. return "timeout";
  573. if (err_mask & AC_ERR_HSM)
  574. return "HSM violation";
  575. if (err_mask & AC_ERR_SYSTEM)
  576. return "internal error";
  577. if (err_mask & AC_ERR_MEDIA)
  578. return "media error";
  579. if (err_mask & AC_ERR_INVALID)
  580. return "invalid argument";
  581. if (err_mask & AC_ERR_DEV)
  582. return "device error";
  583. return "unknown error";
  584. }
  585. /**
  586. * ata_read_log_page - read a specific log page
  587. * @dev: target device
  588. * @page: page to read
  589. * @buf: buffer to store read page
  590. * @sectors: number of sectors to read
  591. *
  592. * Read log page using READ_LOG_EXT command.
  593. *
  594. * LOCKING:
  595. * Kernel thread context (may sleep).
  596. *
  597. * RETURNS:
  598. * 0 on success, AC_ERR_* mask otherwise.
  599. */
  600. static unsigned int ata_read_log_page(struct ata_device *dev,
  601. u8 page, void *buf, unsigned int sectors)
  602. {
  603. struct ata_taskfile tf;
  604. unsigned int err_mask;
  605. DPRINTK("read log page - page %d\n", page);
  606. ata_tf_init(dev, &tf);
  607. tf.command = ATA_CMD_READ_LOG_EXT;
  608. tf.lbal = page;
  609. tf.nsect = sectors;
  610. tf.hob_nsect = sectors >> 8;
  611. tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;
  612. tf.protocol = ATA_PROT_PIO;
  613. err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
  614. buf, sectors * ATA_SECT_SIZE);
  615. DPRINTK("EXIT, err_mask=%x\n", err_mask);
  616. return err_mask;
  617. }
  618. /**
  619. * ata_eh_read_log_10h - Read log page 10h for NCQ error details
  620. * @dev: Device to read log page 10h from
  621. * @tag: Resulting tag of the failed command
  622. * @tf: Resulting taskfile registers of the failed command
  623. *
  624. * Read log page 10h to obtain NCQ error details and clear error
  625. * condition.
  626. *
  627. * LOCKING:
  628. * Kernel thread context (may sleep).
  629. *
  630. * RETURNS:
  631. * 0 on success, -errno otherwise.
  632. */
  633. static int ata_eh_read_log_10h(struct ata_device *dev,
  634. int *tag, struct ata_taskfile *tf)
  635. {
  636. u8 *buf = dev->ap->sector_buf;
  637. unsigned int err_mask;
  638. u8 csum;
  639. int i;
  640. err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1);
  641. if (err_mask)
  642. return -EIO;
  643. csum = 0;
  644. for (i = 0; i < ATA_SECT_SIZE; i++)
  645. csum += buf[i];
  646. if (csum)
  647. ata_dev_printk(dev, KERN_WARNING,
  648. "invalid checksum 0x%x on log page 10h\n", csum);
  649. if (buf[0] & 0x80)
  650. return -ENOENT;
  651. *tag = buf[0] & 0x1f;
  652. tf->command = buf[2];
  653. tf->feature = buf[3];
  654. tf->lbal = buf[4];
  655. tf->lbam = buf[5];
  656. tf->lbah = buf[6];
  657. tf->device = buf[7];
  658. tf->hob_lbal = buf[8];
  659. tf->hob_lbam = buf[9];
  660. tf->hob_lbah = buf[10];
  661. tf->nsect = buf[12];
  662. tf->hob_nsect = buf[13];
  663. return 0;
  664. }
  665. /**
  666. * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
  667. * @dev: device to perform REQUEST_SENSE to
  668. * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
  669. *
  670. * Perform ATAPI REQUEST_SENSE after the device reported CHECK
  671. * SENSE. This function is EH helper.
  672. *
  673. * LOCKING:
  674. * Kernel thread context (may sleep).
  675. *
  676. * RETURNS:
  677. * 0 on success, AC_ERR_* mask on failure
  678. */
  679. static unsigned int atapi_eh_request_sense(struct ata_device *dev,
  680. unsigned char *sense_buf)
  681. {
  682. struct ata_port *ap = dev->ap;
  683. struct ata_taskfile tf;
  684. u8 cdb[ATAPI_CDB_LEN];
  685. DPRINTK("ATAPI request sense\n");
  686. ata_tf_init(dev, &tf);
  687. /* FIXME: is this needed? */
  688. memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
  689. /* XXX: why tf_read here? */
  690. ap->ops->tf_read(ap, &tf);
  691. /* fill these in, for the case where they are -not- overwritten */
  692. sense_buf[0] = 0x70;
  693. sense_buf[2] = tf.feature >> 4;
  694. memset(cdb, 0, ATAPI_CDB_LEN);
  695. cdb[0] = REQUEST_SENSE;
  696. cdb[4] = SCSI_SENSE_BUFFERSIZE;
  697. tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
  698. tf.command = ATA_CMD_PACKET;
  699. /* is it pointless to prefer PIO for "safety reasons"? */
  700. if (ap->flags & ATA_FLAG_PIO_DMA) {
  701. tf.protocol = ATA_PROT_ATAPI_DMA;
  702. tf.feature |= ATAPI_PKT_DMA;
  703. } else {
  704. tf.protocol = ATA_PROT_ATAPI;
  705. tf.lbam = (8 * 1024) & 0xff;
  706. tf.lbah = (8 * 1024) >> 8;
  707. }
  708. return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
  709. sense_buf, SCSI_SENSE_BUFFERSIZE);
  710. }
  711. /**
  712. * ata_eh_analyze_serror - analyze SError for a failed port
  713. * @ap: ATA port to analyze SError for
  714. *
  715. * Analyze SError if available and further determine cause of
  716. * failure.
  717. *
  718. * LOCKING:
  719. * None.
  720. */
  721. static void ata_eh_analyze_serror(struct ata_port *ap)
  722. {
  723. struct ata_eh_context *ehc = &ap->eh_context;
  724. u32 serror = ehc->i.serror;
  725. unsigned int err_mask = 0, action = 0;
  726. if (serror & SERR_PERSISTENT) {
  727. err_mask |= AC_ERR_ATA_BUS;
  728. action |= ATA_EH_HARDRESET;
  729. }
  730. if (serror &
  731. (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
  732. err_mask |= AC_ERR_ATA_BUS;
  733. action |= ATA_EH_SOFTRESET;
  734. }
  735. if (serror & SERR_PROTOCOL) {
  736. err_mask |= AC_ERR_HSM;
  737. action |= ATA_EH_SOFTRESET;
  738. }
  739. if (serror & SERR_INTERNAL) {
  740. err_mask |= AC_ERR_SYSTEM;
  741. action |= ATA_EH_SOFTRESET;
  742. }
  743. if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) {
  744. err_mask |= AC_ERR_ATA_BUS;
  745. action |= ATA_EH_HARDRESET;
  746. }
  747. ehc->i.err_mask |= err_mask;
  748. ehc->i.action |= action;
  749. }
  750. /**
  751. * ata_eh_analyze_ncq_error - analyze NCQ error
  752. * @ap: ATA port to analyze NCQ error for
  753. *
  754. * Read log page 10h, determine the offending qc and acquire
  755. * error status TF. For NCQ device errors, all LLDDs have to do
  756. * is setting AC_ERR_DEV in ehi->err_mask. This function takes
  757. * care of the rest.
  758. *
  759. * LOCKING:
  760. * Kernel thread context (may sleep).
  761. */
  762. static void ata_eh_analyze_ncq_error(struct ata_port *ap)
  763. {
  764. struct ata_eh_context *ehc = &ap->eh_context;
  765. struct ata_device *dev = ap->device;
  766. struct ata_queued_cmd *qc;
  767. struct ata_taskfile tf;
  768. int tag, rc;
  769. /* if frozen, we can't do much */
  770. if (ap->flags & ATA_FLAG_FROZEN)
  771. return;
  772. /* is it NCQ device error? */
  773. if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
  774. return;
  775. /* has LLDD analyzed already? */
  776. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  777. qc = __ata_qc_from_tag(ap, tag);
  778. if (!(qc->flags & ATA_QCFLAG_FAILED))
  779. continue;
  780. if (qc->err_mask)
  781. return;
  782. }
  783. /* okay, this error is ours */
  784. rc = ata_eh_read_log_10h(dev, &tag, &tf);
  785. if (rc) {
  786. ata_port_printk(ap, KERN_ERR, "failed to read log page 10h "
  787. "(errno=%d)\n", rc);
  788. return;
  789. }
  790. if (!(ap->sactive & (1 << tag))) {
  791. ata_port_printk(ap, KERN_ERR, "log page 10h reported "
  792. "inactive tag %d\n", tag);
  793. return;
  794. }
  795. /* we've got the perpetrator, condemn it */
  796. qc = __ata_qc_from_tag(ap, tag);
  797. memcpy(&qc->result_tf, &tf, sizeof(tf));
  798. qc->err_mask |= AC_ERR_DEV;
  799. ehc->i.err_mask &= ~AC_ERR_DEV;
  800. }
  801. /**
  802. * ata_eh_analyze_tf - analyze taskfile of a failed qc
  803. * @qc: qc to analyze
  804. * @tf: Taskfile registers to analyze
  805. *
  806. * Analyze taskfile of @qc and further determine cause of
  807. * failure. This function also requests ATAPI sense data if
  808. * avaliable.
  809. *
  810. * LOCKING:
  811. * Kernel thread context (may sleep).
  812. *
  813. * RETURNS:
  814. * Determined recovery action
  815. */
  816. static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
  817. const struct ata_taskfile *tf)
  818. {
  819. unsigned int tmp, action = 0;
  820. u8 stat = tf->command, err = tf->feature;
  821. if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
  822. qc->err_mask |= AC_ERR_HSM;
  823. return ATA_EH_SOFTRESET;
  824. }
  825. if (!(qc->err_mask & AC_ERR_DEV))
  826. return 0;
  827. switch (qc->dev->class) {
  828. case ATA_DEV_ATA:
  829. if (err & ATA_ICRC)
  830. qc->err_mask |= AC_ERR_ATA_BUS;
  831. if (err & ATA_UNC)
  832. qc->err_mask |= AC_ERR_MEDIA;
  833. if (err & ATA_IDNF)
  834. qc->err_mask |= AC_ERR_INVALID;
  835. break;
  836. case ATA_DEV_ATAPI:
  837. tmp = atapi_eh_request_sense(qc->dev,
  838. qc->scsicmd->sense_buffer);
  839. if (!tmp) {
  840. /* ATA_QCFLAG_SENSE_VALID is used to tell
  841. * atapi_qc_complete() that sense data is
  842. * already valid.
  843. *
  844. * TODO: interpret sense data and set
  845. * appropriate err_mask.
  846. */
  847. qc->flags |= ATA_QCFLAG_SENSE_VALID;
  848. } else
  849. qc->err_mask |= tmp;
  850. }
  851. if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
  852. action |= ATA_EH_SOFTRESET;
  853. return action;
  854. }
  855. static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
  856. {
  857. if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
  858. return 1;
  859. if (ent->is_io) {
  860. if (ent->err_mask & AC_ERR_HSM)
  861. return 1;
  862. if ((ent->err_mask &
  863. (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
  864. return 2;
  865. }
  866. return 0;
  867. }
  868. struct speed_down_needed_arg {
  869. u64 since;
  870. int nr_errors[3];
  871. };
  872. static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
  873. {
  874. struct speed_down_needed_arg *arg = void_arg;
  875. if (ent->timestamp < arg->since)
  876. return -1;
  877. arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
  878. return 0;
  879. }
  880. /**
  881. * ata_eh_speed_down_needed - Determine wheter speed down is necessary
  882. * @dev: Device of interest
  883. *
  884. * This function examines error ring of @dev and determines
  885. * whether speed down is necessary. Speed down is necessary if
  886. * there have been more than 3 of Cat-1 errors or 10 of Cat-2
  887. * errors during last 15 minutes.
  888. *
  889. * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
  890. * violation for known supported commands.
  891. *
  892. * Cat-2 errors are unclassified DEV error for known supported
  893. * command.
  894. *
  895. * LOCKING:
  896. * Inherited from caller.
  897. *
  898. * RETURNS:
  899. * 1 if speed down is necessary, 0 otherwise
  900. */
  901. static int ata_eh_speed_down_needed(struct ata_device *dev)
  902. {
  903. const u64 interval = 15LLU * 60 * HZ;
  904. static const int err_limits[3] = { -1, 3, 10 };
  905. struct speed_down_needed_arg arg;
  906. struct ata_ering_entry *ent;
  907. int err_cat;
  908. u64 j64;
  909. ent = ata_ering_top(&dev->ering);
  910. if (!ent)
  911. return 0;
  912. err_cat = ata_eh_categorize_ering_entry(ent);
  913. if (err_cat == 0)
  914. return 0;
  915. memset(&arg, 0, sizeof(arg));
  916. j64 = get_jiffies_64();
  917. if (j64 >= interval)
  918. arg.since = j64 - interval;
  919. else
  920. arg.since = 0;
  921. ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
  922. return arg.nr_errors[err_cat] > err_limits[err_cat];
  923. }
  924. /**
  925. * ata_eh_speed_down - record error and speed down if necessary
  926. * @dev: Failed device
  927. * @is_io: Did the device fail during normal IO?
  928. * @err_mask: err_mask of the error
  929. *
  930. * Record error and examine error history to determine whether
  931. * adjusting transmission speed is necessary. It also sets
  932. * transmission limits appropriately if such adjustment is
  933. * necessary.
  934. *
  935. * LOCKING:
  936. * Kernel thread context (may sleep).
  937. *
  938. * RETURNS:
  939. * 0 on success, -errno otherwise
  940. */
  941. static int ata_eh_speed_down(struct ata_device *dev, int is_io,
  942. unsigned int err_mask)
  943. {
  944. if (!err_mask)
  945. return 0;
  946. /* record error and determine whether speed down is necessary */
  947. ata_ering_record(&dev->ering, is_io, err_mask);
  948. if (!ata_eh_speed_down_needed(dev))
  949. return 0;
  950. /* speed down SATA link speed if possible */
  951. if (sata_down_spd_limit(dev->ap) == 0)
  952. return ATA_EH_HARDRESET;
  953. /* lower transfer mode */
  954. if (ata_down_xfermask_limit(dev, 0) == 0)
  955. return ATA_EH_SOFTRESET;
  956. ata_dev_printk(dev, KERN_ERR,
  957. "speed down requested but no transfer mode left\n");
  958. return 0;
  959. }
  960. /**
  961. * ata_eh_autopsy - analyze error and determine recovery action
  962. * @ap: ATA port to perform autopsy on
  963. *
  964. * Analyze why @ap failed and determine which recovery action is
  965. * needed. This function also sets more detailed AC_ERR_* values
  966. * and fills sense data for ATAPI CHECK SENSE.
  967. *
  968. * LOCKING:
  969. * Kernel thread context (may sleep).
  970. */
  971. static void ata_eh_autopsy(struct ata_port *ap)
  972. {
  973. struct ata_eh_context *ehc = &ap->eh_context;
  974. unsigned int action = ehc->i.action;
  975. struct ata_device *failed_dev = NULL;
  976. unsigned int all_err_mask = 0;
  977. int tag, is_io = 0;
  978. u32 serror;
  979. int rc;
  980. DPRINTK("ENTER\n");
  981. /* obtain and analyze SError */
  982. rc = sata_scr_read(ap, SCR_ERROR, &serror);
  983. if (rc == 0) {
  984. ehc->i.serror |= serror;
  985. ata_eh_analyze_serror(ap);
  986. } else if (rc != -EOPNOTSUPP)
  987. action |= ATA_EH_HARDRESET;
  988. /* analyze NCQ failure */
  989. ata_eh_analyze_ncq_error(ap);
  990. /* any real error trumps AC_ERR_OTHER */
  991. if (ehc->i.err_mask & ~AC_ERR_OTHER)
  992. ehc->i.err_mask &= ~AC_ERR_OTHER;
  993. all_err_mask |= ehc->i.err_mask;
  994. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  995. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  996. if (!(qc->flags & ATA_QCFLAG_FAILED))
  997. continue;
  998. /* inherit upper level err_mask */
  999. qc->err_mask |= ehc->i.err_mask;
  1000. /* analyze TF */
  1001. action |= ata_eh_analyze_tf(qc, &qc->result_tf);
  1002. /* DEV errors are probably spurious in case of ATA_BUS error */
  1003. if (qc->err_mask & AC_ERR_ATA_BUS)
  1004. qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
  1005. AC_ERR_INVALID);
  1006. /* any real error trumps unknown error */
  1007. if (qc->err_mask & ~AC_ERR_OTHER)
  1008. qc->err_mask &= ~AC_ERR_OTHER;
  1009. /* SENSE_VALID trumps dev/unknown error and revalidation */
  1010. if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
  1011. qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
  1012. action &= ~ATA_EH_REVALIDATE;
  1013. }
  1014. /* accumulate error info */
  1015. failed_dev = qc->dev;
  1016. all_err_mask |= qc->err_mask;
  1017. if (qc->flags & ATA_QCFLAG_IO)
  1018. is_io = 1;
  1019. }
  1020. /* speed down iff command was in progress */
  1021. if (failed_dev)
  1022. action |= ata_eh_speed_down(failed_dev, is_io, all_err_mask);
  1023. /* enforce default EH actions */
  1024. if (ap->flags & ATA_FLAG_FROZEN ||
  1025. all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
  1026. action |= ATA_EH_SOFTRESET;
  1027. else if (all_err_mask)
  1028. action |= ATA_EH_REVALIDATE;
  1029. /* record autopsy result */
  1030. ehc->i.dev = failed_dev;
  1031. ehc->i.action = action;
  1032. DPRINTK("EXIT\n");
  1033. }
  1034. /**
  1035. * ata_eh_report - report error handling to user
  1036. * @ap: ATA port EH is going on
  1037. *
  1038. * Report EH to user.
  1039. *
  1040. * LOCKING:
  1041. * None.
  1042. */
  1043. static void ata_eh_report(struct ata_port *ap)
  1044. {
  1045. struct ata_eh_context *ehc = &ap->eh_context;
  1046. const char *frozen, *desc;
  1047. int tag, nr_failed = 0;
  1048. desc = NULL;
  1049. if (ehc->i.desc[0] != '\0')
  1050. desc = ehc->i.desc;
  1051. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1052. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1053. if (!(qc->flags & ATA_QCFLAG_FAILED))
  1054. continue;
  1055. if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
  1056. continue;
  1057. nr_failed++;
  1058. }
  1059. if (!nr_failed && !ehc->i.err_mask)
  1060. return;
  1061. frozen = "";
  1062. if (ap->flags & ATA_FLAG_FROZEN)
  1063. frozen = " frozen";
  1064. if (ehc->i.dev) {
  1065. ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x "
  1066. "SAct 0x%x SErr 0x%x action 0x%x%s\n",
  1067. ehc->i.err_mask, ap->sactive, ehc->i.serror,
  1068. ehc->i.action, frozen);
  1069. if (desc)
  1070. ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc);
  1071. } else {
  1072. ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x "
  1073. "SAct 0x%x SErr 0x%x action 0x%x%s\n",
  1074. ehc->i.err_mask, ap->sactive, ehc->i.serror,
  1075. ehc->i.action, frozen);
  1076. if (desc)
  1077. ata_port_printk(ap, KERN_ERR, "(%s)\n", desc);
  1078. }
  1079. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1080. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1081. if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask)
  1082. continue;
  1083. ata_dev_printk(qc->dev, KERN_ERR, "tag %d cmd 0x%x "
  1084. "Emask 0x%x stat 0x%x err 0x%x (%s)\n",
  1085. qc->tag, qc->tf.command, qc->err_mask,
  1086. qc->result_tf.command, qc->result_tf.feature,
  1087. ata_err_string(qc->err_mask));
  1088. }
  1089. }
  1090. static int ata_eh_reset(struct ata_port *ap, ata_reset_fn_t softreset,
  1091. ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
  1092. {
  1093. struct ata_eh_context *ehc = &ap->eh_context;
  1094. unsigned int classes[ATA_MAX_DEVICES];
  1095. int tries = ATA_EH_RESET_TRIES;
  1096. ata_reset_fn_t reset;
  1097. int rc;
  1098. if (softreset && (!hardreset || (!sata_set_spd_needed(ap) &&
  1099. !(ehc->i.action & ATA_EH_HARDRESET))))
  1100. reset = softreset;
  1101. else
  1102. reset = hardreset;
  1103. retry:
  1104. ata_port_printk(ap, KERN_INFO, "%s resetting port\n",
  1105. reset == softreset ? "soft" : "hard");
  1106. /* reset */
  1107. ata_eh_about_to_do(ap, ATA_EH_RESET_MASK);
  1108. ehc->i.flags |= ATA_EHI_DID_RESET;
  1109. rc = ata_do_reset(ap, reset, classes);
  1110. if (rc && --tries) {
  1111. ata_port_printk(ap, KERN_WARNING,
  1112. "%sreset failed, retrying in 5 secs\n",
  1113. reset == softreset ? "soft" : "hard");
  1114. ssleep(5);
  1115. if (reset == hardreset)
  1116. sata_down_spd_limit(ap);
  1117. if (hardreset)
  1118. reset = hardreset;
  1119. goto retry;
  1120. }
  1121. if (rc == 0) {
  1122. if (postreset)
  1123. postreset(ap, classes);
  1124. /* reset successful, schedule revalidation */
  1125. ehc->i.dev = NULL;
  1126. ehc->i.action &= ~ATA_EH_RESET_MASK;
  1127. ehc->i.action |= ATA_EH_REVALIDATE;
  1128. }
  1129. return rc;
  1130. }
  1131. static int ata_eh_revalidate(struct ata_port *ap,
  1132. struct ata_device **r_failed_dev)
  1133. {
  1134. struct ata_eh_context *ehc = &ap->eh_context;
  1135. struct ata_device *dev;
  1136. int i, rc = 0;
  1137. DPRINTK("ENTER\n");
  1138. for (i = 0; i < ATA_MAX_DEVICES; i++) {
  1139. dev = &ap->device[i];
  1140. if (ehc->i.action & ATA_EH_REVALIDATE && ata_dev_enabled(dev) &&
  1141. (!ehc->i.dev || ehc->i.dev == dev)) {
  1142. if (ata_port_offline(ap)) {
  1143. rc = -EIO;
  1144. break;
  1145. }
  1146. ata_eh_about_to_do(ap, ATA_EH_REVALIDATE);
  1147. rc = ata_dev_revalidate(dev,
  1148. ehc->i.flags & ATA_EHI_DID_RESET);
  1149. if (rc)
  1150. break;
  1151. ehc->i.action &= ~ATA_EH_REVALIDATE;
  1152. }
  1153. }
  1154. if (rc)
  1155. *r_failed_dev = dev;
  1156. DPRINTK("EXIT\n");
  1157. return rc;
  1158. }
  1159. static int ata_port_nr_enabled(struct ata_port *ap)
  1160. {
  1161. int i, cnt = 0;
  1162. for (i = 0; i < ATA_MAX_DEVICES; i++)
  1163. if (ata_dev_enabled(&ap->device[i]))
  1164. cnt++;
  1165. return cnt;
  1166. }
  1167. /**
  1168. * ata_eh_recover - recover host port after error
  1169. * @ap: host port to recover
  1170. * @softreset: softreset method (can be NULL)
  1171. * @hardreset: hardreset method (can be NULL)
  1172. * @postreset: postreset method (can be NULL)
  1173. *
  1174. * This is the alpha and omega, eum and yang, heart and soul of
  1175. * libata exception handling. On entry, actions required to
  1176. * recover each devices are recorded in eh_context. This
  1177. * function executes all the operations with appropriate retrials
  1178. * and fallbacks to resurrect failed devices.
  1179. *
  1180. * LOCKING:
  1181. * Kernel thread context (may sleep).
  1182. *
  1183. * RETURNS:
  1184. * 0 on success, -errno on failure.
  1185. */
  1186. static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset,
  1187. ata_reset_fn_t hardreset,
  1188. ata_postreset_fn_t postreset)
  1189. {
  1190. struct ata_eh_context *ehc = &ap->eh_context;
  1191. struct ata_device *dev;
  1192. int down_xfermask, i, rc;
  1193. DPRINTK("ENTER\n");
  1194. /* prep for recovery */
  1195. for (i = 0; i < ATA_MAX_DEVICES; i++) {
  1196. dev = &ap->device[i];
  1197. ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
  1198. }
  1199. retry:
  1200. down_xfermask = 0;
  1201. rc = 0;
  1202. /* skip EH if possible. */
  1203. if (!ata_port_nr_enabled(ap) && !(ap->flags & ATA_FLAG_FROZEN))
  1204. ehc->i.action = 0;
  1205. /* reset */
  1206. if (ehc->i.action & ATA_EH_RESET_MASK) {
  1207. ata_eh_freeze_port(ap);
  1208. rc = ata_eh_reset(ap, softreset, hardreset, postreset);
  1209. if (rc) {
  1210. ata_port_printk(ap, KERN_ERR,
  1211. "reset failed, giving up\n");
  1212. goto out;
  1213. }
  1214. ata_eh_thaw_port(ap);
  1215. }
  1216. /* revalidate existing devices */
  1217. rc = ata_eh_revalidate(ap, &dev);
  1218. if (rc)
  1219. goto dev_fail;
  1220. /* configure transfer mode if the port has been reset */
  1221. if (ehc->i.flags & ATA_EHI_DID_RESET) {
  1222. rc = ata_set_mode(ap, &dev);
  1223. if (rc) {
  1224. down_xfermask = 1;
  1225. goto dev_fail;
  1226. }
  1227. }
  1228. goto out;
  1229. dev_fail:
  1230. switch (rc) {
  1231. case -ENODEV:
  1232. case -EINVAL:
  1233. ehc->tries[dev->devno] = 0;
  1234. break;
  1235. case -EIO:
  1236. sata_down_spd_limit(ap);
  1237. default:
  1238. ehc->tries[dev->devno]--;
  1239. if (down_xfermask &&
  1240. ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1))
  1241. ehc->tries[dev->devno] = 0;
  1242. }
  1243. /* disable device if it has used up all its chances */
  1244. if (ata_dev_enabled(dev) && !ehc->tries[dev->devno])
  1245. ata_dev_disable(dev);
  1246. /* soft didn't work? be haaaaard */
  1247. if (ehc->i.flags & ATA_EHI_DID_RESET)
  1248. ehc->i.action |= ATA_EH_HARDRESET;
  1249. else
  1250. ehc->i.action |= ATA_EH_SOFTRESET;
  1251. if (ata_port_nr_enabled(ap)) {
  1252. ata_port_printk(ap, KERN_WARNING, "failed to recover some "
  1253. "devices, retrying in 5 secs\n");
  1254. ssleep(5);
  1255. } else {
  1256. /* no device left, repeat fast */
  1257. msleep(500);
  1258. }
  1259. goto retry;
  1260. out:
  1261. if (rc) {
  1262. for (i = 0; i < ATA_MAX_DEVICES; i++)
  1263. ata_dev_disable(&ap->device[i]);
  1264. }
  1265. DPRINTK("EXIT, rc=%d\n", rc);
  1266. return rc;
  1267. }
  1268. /**
  1269. * ata_eh_finish - finish up EH
  1270. * @ap: host port to finish EH for
  1271. *
  1272. * Recovery is complete. Clean up EH states and retry or finish
  1273. * failed qcs.
  1274. *
  1275. * LOCKING:
  1276. * None.
  1277. */
  1278. static void ata_eh_finish(struct ata_port *ap)
  1279. {
  1280. int tag;
  1281. /* retry or finish qcs */
  1282. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1283. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1284. if (!(qc->flags & ATA_QCFLAG_FAILED))
  1285. continue;
  1286. if (qc->err_mask) {
  1287. /* FIXME: Once EH migration is complete,
  1288. * generate sense data in this function,
  1289. * considering both err_mask and tf.
  1290. */
  1291. if (qc->err_mask & AC_ERR_INVALID)
  1292. ata_eh_qc_complete(qc);
  1293. else
  1294. ata_eh_qc_retry(qc);
  1295. } else {
  1296. if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
  1297. ata_eh_qc_complete(qc);
  1298. } else {
  1299. /* feed zero TF to sense generation */
  1300. memset(&qc->result_tf, 0, sizeof(qc->result_tf));
  1301. ata_eh_qc_retry(qc);
  1302. }
  1303. }
  1304. }
  1305. }
  1306. /**
  1307. * ata_do_eh - do standard error handling
  1308. * @ap: host port to handle error for
  1309. * @softreset: softreset method (can be NULL)
  1310. * @hardreset: hardreset method (can be NULL)
  1311. * @postreset: postreset method (can be NULL)
  1312. *
  1313. * Perform standard error handling sequence.
  1314. *
  1315. * LOCKING:
  1316. * Kernel thread context (may sleep).
  1317. */
  1318. void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset,
  1319. ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
  1320. {
  1321. ata_eh_autopsy(ap);
  1322. ata_eh_report(ap);
  1323. ata_eh_recover(ap, softreset, hardreset, postreset);
  1324. ata_eh_finish(ap);
  1325. }