libata-eh.c 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560
  1. /*
  2. * libata-eh.c - libata error handling
  3. *
  4. * Maintained by: Jeff Garzik <jgarzik@pobox.com>
  5. * Please ALWAYS copy linux-ide@vger.kernel.org
  6. * on emails.
  7. *
  8. * Copyright 2006 Tejun Heo <htejun@gmail.com>
  9. *
  10. *
  11. * This program is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU General Public License as
  13. * published by the Free Software Foundation; either version 2, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. * General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; see the file COPYING. If not, write to
  23. * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
  24. * USA.
  25. *
  26. *
  27. * libata documentation is available via 'make {ps|pdf}docs',
  28. * as Documentation/DocBook/libata.*
  29. *
  30. * Hardware documentation available from http://www.t13.org/ and
  31. * http://www.sata-io.org/
  32. *
  33. */
  34. #include <linux/config.h>
  35. #include <linux/kernel.h>
  36. #include <scsi/scsi.h>
  37. #include <scsi/scsi_host.h>
  38. #include <scsi/scsi_eh.h>
  39. #include <scsi/scsi_device.h>
  40. #include <scsi/scsi_cmnd.h>
  41. #include <linux/libata.h>
  42. #include "libata.h"
  43. static void __ata_port_freeze(struct ata_port *ap);
  44. static void ata_ering_record(struct ata_ering *ering, int is_io,
  45. unsigned int err_mask)
  46. {
  47. struct ata_ering_entry *ent;
  48. WARN_ON(!err_mask);
  49. ering->cursor++;
  50. ering->cursor %= ATA_ERING_SIZE;
  51. ent = &ering->ring[ering->cursor];
  52. ent->is_io = is_io;
  53. ent->err_mask = err_mask;
  54. ent->timestamp = get_jiffies_64();
  55. }
  56. static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering)
  57. {
  58. struct ata_ering_entry *ent = &ering->ring[ering->cursor];
  59. if (!ent->err_mask)
  60. return NULL;
  61. return ent;
  62. }
  63. static int ata_ering_map(struct ata_ering *ering,
  64. int (*map_fn)(struct ata_ering_entry *, void *),
  65. void *arg)
  66. {
  67. int idx, rc = 0;
  68. struct ata_ering_entry *ent;
  69. idx = ering->cursor;
  70. do {
  71. ent = &ering->ring[idx];
  72. if (!ent->err_mask)
  73. break;
  74. rc = map_fn(ent, arg);
  75. if (rc)
  76. break;
  77. idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
  78. } while (idx != ering->cursor);
  79. return rc;
  80. }
  81. /**
  82. * ata_scsi_timed_out - SCSI layer time out callback
  83. * @cmd: timed out SCSI command
  84. *
  85. * Handles SCSI layer timeout. We race with normal completion of
  86. * the qc for @cmd. If the qc is already gone, we lose and let
  87. * the scsi command finish (EH_HANDLED). Otherwise, the qc has
  88. * timed out and EH should be invoked. Prevent ata_qc_complete()
  89. * from finishing it by setting EH_SCHEDULED and return
  90. * EH_NOT_HANDLED.
  91. *
  92. * TODO: kill this function once old EH is gone.
  93. *
  94. * LOCKING:
  95. * Called from timer context
  96. *
  97. * RETURNS:
  98. * EH_HANDLED or EH_NOT_HANDLED
  99. */
  100. enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
  101. {
  102. struct Scsi_Host *host = cmd->device->host;
  103. struct ata_port *ap = ata_shost_to_port(host);
  104. unsigned long flags;
  105. struct ata_queued_cmd *qc;
  106. enum scsi_eh_timer_return ret;
  107. DPRINTK("ENTER\n");
  108. if (ap->ops->error_handler) {
  109. ret = EH_NOT_HANDLED;
  110. goto out;
  111. }
  112. ret = EH_HANDLED;
  113. spin_lock_irqsave(&ap->host_set->lock, flags);
  114. qc = ata_qc_from_tag(ap, ap->active_tag);
  115. if (qc) {
  116. WARN_ON(qc->scsicmd != cmd);
  117. qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
  118. qc->err_mask |= AC_ERR_TIMEOUT;
  119. ret = EH_NOT_HANDLED;
  120. }
  121. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  122. out:
  123. DPRINTK("EXIT, ret=%d\n", ret);
  124. return ret;
  125. }
  126. /**
  127. * ata_scsi_error - SCSI layer error handler callback
  128. * @host: SCSI host on which error occurred
  129. *
  130. * Handles SCSI-layer-thrown error events.
  131. *
  132. * LOCKING:
  133. * Inherited from SCSI layer (none, can sleep)
  134. *
  135. * RETURNS:
  136. * Zero.
  137. */
  138. void ata_scsi_error(struct Scsi_Host *host)
  139. {
  140. struct ata_port *ap = ata_shost_to_port(host);
  141. spinlock_t *hs_lock = &ap->host_set->lock;
  142. int i, repeat_cnt = ATA_EH_MAX_REPEAT;
  143. unsigned long flags;
  144. DPRINTK("ENTER\n");
  145. /* synchronize with port task */
  146. ata_port_flush_task(ap);
  147. /* synchronize with host_set lock and sort out timeouts */
  148. /* For new EH, all qcs are finished in one of three ways -
  149. * normal completion, error completion, and SCSI timeout.
  150. * Both cmpletions can race against SCSI timeout. When normal
  151. * completion wins, the qc never reaches EH. When error
  152. * completion wins, the qc has ATA_QCFLAG_FAILED set.
  153. *
  154. * When SCSI timeout wins, things are a bit more complex.
  155. * Normal or error completion can occur after the timeout but
  156. * before this point. In such cases, both types of
  157. * completions are honored. A scmd is determined to have
  158. * timed out iff its associated qc is active and not failed.
  159. */
  160. if (ap->ops->error_handler) {
  161. struct scsi_cmnd *scmd, *tmp;
  162. int nr_timedout = 0;
  163. spin_lock_irqsave(hs_lock, flags);
  164. list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
  165. struct ata_queued_cmd *qc;
  166. for (i = 0; i < ATA_MAX_QUEUE; i++) {
  167. qc = __ata_qc_from_tag(ap, i);
  168. if (qc->flags & ATA_QCFLAG_ACTIVE &&
  169. qc->scsicmd == scmd)
  170. break;
  171. }
  172. if (i < ATA_MAX_QUEUE) {
  173. /* the scmd has an associated qc */
  174. if (!(qc->flags & ATA_QCFLAG_FAILED)) {
  175. /* which hasn't failed yet, timeout */
  176. qc->err_mask |= AC_ERR_TIMEOUT;
  177. qc->flags |= ATA_QCFLAG_FAILED;
  178. nr_timedout++;
  179. }
  180. } else {
  181. /* Normal completion occurred after
  182. * SCSI timeout but before this point.
  183. * Successfully complete it.
  184. */
  185. scmd->retries = scmd->allowed;
  186. scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
  187. }
  188. }
  189. /* If we have timed out qcs. They belong to EH from
  190. * this point but the state of the controller is
  191. * unknown. Freeze the port to make sure the IRQ
  192. * handler doesn't diddle with those qcs. This must
  193. * be done atomically w.r.t. setting QCFLAG_FAILED.
  194. */
  195. if (nr_timedout)
  196. __ata_port_freeze(ap);
  197. spin_unlock_irqrestore(hs_lock, flags);
  198. } else
  199. spin_unlock_wait(hs_lock);
  200. repeat:
  201. /* invoke error handler */
  202. if (ap->ops->error_handler) {
  203. /* fetch & clear EH info */
  204. spin_lock_irqsave(hs_lock, flags);
  205. memset(&ap->eh_context, 0, sizeof(ap->eh_context));
  206. ap->eh_context.i = ap->eh_info;
  207. memset(&ap->eh_info, 0, sizeof(ap->eh_info));
  208. ap->flags &= ~ATA_FLAG_EH_PENDING;
  209. spin_unlock_irqrestore(hs_lock, flags);
  210. /* invoke EH */
  211. ap->ops->error_handler(ap);
  212. /* Exception might have happend after ->error_handler
  213. * recovered the port but before this point. Repeat
  214. * EH in such case.
  215. */
  216. spin_lock_irqsave(hs_lock, flags);
  217. if (ap->flags & ATA_FLAG_EH_PENDING) {
  218. if (--repeat_cnt) {
  219. ata_port_printk(ap, KERN_INFO,
  220. "EH pending after completion, "
  221. "repeating EH (cnt=%d)\n", repeat_cnt);
  222. spin_unlock_irqrestore(hs_lock, flags);
  223. goto repeat;
  224. }
  225. ata_port_printk(ap, KERN_ERR, "EH pending after %d "
  226. "tries, giving up\n", ATA_EH_MAX_REPEAT);
  227. }
  228. /* this run is complete, make sure EH info is clear */
  229. memset(&ap->eh_info, 0, sizeof(ap->eh_info));
  230. /* Clear host_eh_scheduled while holding hs_lock such
  231. * that if exception occurs after this point but
  232. * before EH completion, SCSI midlayer will
  233. * re-initiate EH.
  234. */
  235. host->host_eh_scheduled = 0;
  236. spin_unlock_irqrestore(hs_lock, flags);
  237. } else {
  238. WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
  239. ap->ops->eng_timeout(ap);
  240. }
  241. /* finish or retry handled scmd's and clean up */
  242. WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
  243. scsi_eh_flush_done_q(&ap->eh_done_q);
  244. /* clean up */
  245. spin_lock_irqsave(hs_lock, flags);
  246. if (ap->flags & ATA_FLAG_RECOVERED)
  247. ata_port_printk(ap, KERN_INFO, "EH complete\n");
  248. ap->flags &= ~ATA_FLAG_RECOVERED;
  249. spin_unlock_irqrestore(hs_lock, flags);
  250. DPRINTK("EXIT\n");
  251. }
  252. /**
  253. * ata_qc_timeout - Handle timeout of queued command
  254. * @qc: Command that timed out
  255. *
  256. * Some part of the kernel (currently, only the SCSI layer)
  257. * has noticed that the active command on port @ap has not
  258. * completed after a specified length of time. Handle this
  259. * condition by disabling DMA (if necessary) and completing
  260. * transactions, with error if necessary.
  261. *
  262. * This also handles the case of the "lost interrupt", where
  263. * for some reason (possibly hardware bug, possibly driver bug)
  264. * an interrupt was not delivered to the driver, even though the
  265. * transaction completed successfully.
  266. *
  267. * TODO: kill this function once old EH is gone.
  268. *
  269. * LOCKING:
  270. * Inherited from SCSI layer (none, can sleep)
  271. */
  272. static void ata_qc_timeout(struct ata_queued_cmd *qc)
  273. {
  274. struct ata_port *ap = qc->ap;
  275. struct ata_host_set *host_set = ap->host_set;
  276. u8 host_stat = 0, drv_stat;
  277. unsigned long flags;
  278. DPRINTK("ENTER\n");
  279. ap->hsm_task_state = HSM_ST_IDLE;
  280. spin_lock_irqsave(&host_set->lock, flags);
  281. switch (qc->tf.protocol) {
  282. case ATA_PROT_DMA:
  283. case ATA_PROT_ATAPI_DMA:
  284. host_stat = ap->ops->bmdma_status(ap);
  285. /* before we do anything else, clear DMA-Start bit */
  286. ap->ops->bmdma_stop(qc);
  287. /* fall through */
  288. default:
  289. ata_altstatus(ap);
  290. drv_stat = ata_chk_status(ap);
  291. /* ack bmdma irq events */
  292. ap->ops->irq_clear(ap);
  293. ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, "
  294. "stat 0x%x host_stat 0x%x\n",
  295. qc->tf.command, drv_stat, host_stat);
  296. /* complete taskfile transaction */
  297. qc->err_mask |= AC_ERR_TIMEOUT;
  298. break;
  299. }
  300. spin_unlock_irqrestore(&host_set->lock, flags);
  301. ata_eh_qc_complete(qc);
  302. DPRINTK("EXIT\n");
  303. }
  304. /**
  305. * ata_eng_timeout - Handle timeout of queued command
  306. * @ap: Port on which timed-out command is active
  307. *
  308. * Some part of the kernel (currently, only the SCSI layer)
  309. * has noticed that the active command on port @ap has not
  310. * completed after a specified length of time. Handle this
  311. * condition by disabling DMA (if necessary) and completing
  312. * transactions, with error if necessary.
  313. *
  314. * This also handles the case of the "lost interrupt", where
  315. * for some reason (possibly hardware bug, possibly driver bug)
  316. * an interrupt was not delivered to the driver, even though the
  317. * transaction completed successfully.
  318. *
  319. * TODO: kill this function once old EH is gone.
  320. *
  321. * LOCKING:
  322. * Inherited from SCSI layer (none, can sleep)
  323. */
  324. void ata_eng_timeout(struct ata_port *ap)
  325. {
  326. DPRINTK("ENTER\n");
  327. ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
  328. DPRINTK("EXIT\n");
  329. }
  330. /**
  331. * ata_qc_schedule_eh - schedule qc for error handling
  332. * @qc: command to schedule error handling for
  333. *
  334. * Schedule error handling for @qc. EH will kick in as soon as
  335. * other commands are drained.
  336. *
  337. * LOCKING:
  338. * spin_lock_irqsave(host_set lock)
  339. */
  340. void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
  341. {
  342. struct ata_port *ap = qc->ap;
  343. WARN_ON(!ap->ops->error_handler);
  344. qc->flags |= ATA_QCFLAG_FAILED;
  345. qc->ap->flags |= ATA_FLAG_EH_PENDING;
  346. /* The following will fail if timeout has already expired.
  347. * ata_scsi_error() takes care of such scmds on EH entry.
  348. * Note that ATA_QCFLAG_FAILED is unconditionally set after
  349. * this function completes.
  350. */
  351. scsi_req_abort_cmd(qc->scsicmd);
  352. }
  353. /**
  354. * ata_port_schedule_eh - schedule error handling without a qc
  355. * @ap: ATA port to schedule EH for
  356. *
  357. * Schedule error handling for @ap. EH will kick in as soon as
  358. * all commands are drained.
  359. *
  360. * LOCKING:
  361. * spin_lock_irqsave(host_set lock)
  362. */
  363. void ata_port_schedule_eh(struct ata_port *ap)
  364. {
  365. WARN_ON(!ap->ops->error_handler);
  366. ap->flags |= ATA_FLAG_EH_PENDING;
  367. ata_schedule_scsi_eh(ap->host);
  368. DPRINTK("port EH scheduled\n");
  369. }
  370. /**
  371. * ata_port_abort - abort all qc's on the port
  372. * @ap: ATA port to abort qc's for
  373. *
  374. * Abort all active qc's of @ap and schedule EH.
  375. *
  376. * LOCKING:
  377. * spin_lock_irqsave(host_set lock)
  378. *
  379. * RETURNS:
  380. * Number of aborted qc's.
  381. */
  382. int ata_port_abort(struct ata_port *ap)
  383. {
  384. int tag, nr_aborted = 0;
  385. WARN_ON(!ap->ops->error_handler);
  386. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  387. struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
  388. if (qc) {
  389. qc->flags |= ATA_QCFLAG_FAILED;
  390. ata_qc_complete(qc);
  391. nr_aborted++;
  392. }
  393. }
  394. if (!nr_aborted)
  395. ata_port_schedule_eh(ap);
  396. return nr_aborted;
  397. }
  398. /**
  399. * __ata_port_freeze - freeze port
  400. * @ap: ATA port to freeze
  401. *
  402. * This function is called when HSM violation or some other
  403. * condition disrupts normal operation of the port. Frozen port
  404. * is not allowed to perform any operation until the port is
  405. * thawed, which usually follows a successful reset.
  406. *
  407. * ap->ops->freeze() callback can be used for freezing the port
  408. * hardware-wise (e.g. mask interrupt and stop DMA engine). If a
  409. * port cannot be frozen hardware-wise, the interrupt handler
  410. * must ack and clear interrupts unconditionally while the port
  411. * is frozen.
  412. *
  413. * LOCKING:
  414. * spin_lock_irqsave(host_set lock)
  415. */
  416. static void __ata_port_freeze(struct ata_port *ap)
  417. {
  418. WARN_ON(!ap->ops->error_handler);
  419. if (ap->ops->freeze)
  420. ap->ops->freeze(ap);
  421. ap->flags |= ATA_FLAG_FROZEN;
  422. DPRINTK("ata%u port frozen\n", ap->id);
  423. }
  424. /**
  425. * ata_port_freeze - abort & freeze port
  426. * @ap: ATA port to freeze
  427. *
  428. * Abort and freeze @ap.
  429. *
  430. * LOCKING:
  431. * spin_lock_irqsave(host_set lock)
  432. *
  433. * RETURNS:
  434. * Number of aborted commands.
  435. */
  436. int ata_port_freeze(struct ata_port *ap)
  437. {
  438. int nr_aborted;
  439. WARN_ON(!ap->ops->error_handler);
  440. nr_aborted = ata_port_abort(ap);
  441. __ata_port_freeze(ap);
  442. return nr_aborted;
  443. }
  444. /**
  445. * ata_eh_freeze_port - EH helper to freeze port
  446. * @ap: ATA port to freeze
  447. *
  448. * Freeze @ap.
  449. *
  450. * LOCKING:
  451. * None.
  452. */
  453. void ata_eh_freeze_port(struct ata_port *ap)
  454. {
  455. unsigned long flags;
  456. if (!ap->ops->error_handler)
  457. return;
  458. spin_lock_irqsave(&ap->host_set->lock, flags);
  459. __ata_port_freeze(ap);
  460. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  461. }
  462. /**
  463. * ata_port_thaw_port - EH helper to thaw port
  464. * @ap: ATA port to thaw
  465. *
  466. * Thaw frozen port @ap.
  467. *
  468. * LOCKING:
  469. * None.
  470. */
  471. void ata_eh_thaw_port(struct ata_port *ap)
  472. {
  473. unsigned long flags;
  474. if (!ap->ops->error_handler)
  475. return;
  476. spin_lock_irqsave(&ap->host_set->lock, flags);
  477. ap->flags &= ~ATA_FLAG_FROZEN;
  478. if (ap->ops->thaw)
  479. ap->ops->thaw(ap);
  480. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  481. DPRINTK("ata%u port thawed\n", ap->id);
  482. }
  483. static void ata_eh_scsidone(struct scsi_cmnd *scmd)
  484. {
  485. /* nada */
  486. }
  487. static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
  488. {
  489. struct ata_port *ap = qc->ap;
  490. struct scsi_cmnd *scmd = qc->scsicmd;
  491. unsigned long flags;
  492. spin_lock_irqsave(&ap->host_set->lock, flags);
  493. qc->scsidone = ata_eh_scsidone;
  494. __ata_qc_complete(qc);
  495. WARN_ON(ata_tag_valid(qc->tag));
  496. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  497. scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
  498. }
  499. /**
  500. * ata_eh_qc_complete - Complete an active ATA command from EH
  501. * @qc: Command to complete
  502. *
  503. * Indicate to the mid and upper layers that an ATA command has
  504. * completed. To be used from EH.
  505. */
  506. void ata_eh_qc_complete(struct ata_queued_cmd *qc)
  507. {
  508. struct scsi_cmnd *scmd = qc->scsicmd;
  509. scmd->retries = scmd->allowed;
  510. __ata_eh_qc_complete(qc);
  511. }
  512. /**
  513. * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
  514. * @qc: Command to retry
  515. *
  516. * Indicate to the mid and upper layers that an ATA command
  517. * should be retried. To be used from EH.
  518. *
  519. * SCSI midlayer limits the number of retries to scmd->allowed.
  520. * scmd->retries is decremented for commands which get retried
  521. * due to unrelated failures (qc->err_mask is zero).
  522. */
  523. void ata_eh_qc_retry(struct ata_queued_cmd *qc)
  524. {
  525. struct scsi_cmnd *scmd = qc->scsicmd;
  526. if (!qc->err_mask && scmd->retries)
  527. scmd->retries--;
  528. __ata_eh_qc_complete(qc);
  529. }
  530. /**
  531. * ata_eh_about_to_do - about to perform eh_action
  532. * @ap: target ATA port
  533. * @action: action about to be performed
  534. *
  535. * Called just before performing EH actions to clear related bits
  536. * in @ap->eh_info such that eh actions are not unnecessarily
  537. * repeated.
  538. *
  539. * LOCKING:
  540. * None.
  541. */
  542. static void ata_eh_about_to_do(struct ata_port *ap, unsigned int action)
  543. {
  544. unsigned long flags;
  545. spin_lock_irqsave(&ap->host_set->lock, flags);
  546. ap->eh_info.action &= ~action;
  547. ap->flags |= ATA_FLAG_RECOVERED;
  548. spin_unlock_irqrestore(&ap->host_set->lock, flags);
  549. }
  550. /**
  551. * ata_err_string - convert err_mask to descriptive string
  552. * @err_mask: error mask to convert to string
  553. *
  554. * Convert @err_mask to descriptive string. Errors are
  555. * prioritized according to severity and only the most severe
  556. * error is reported.
  557. *
  558. * LOCKING:
  559. * None.
  560. *
  561. * RETURNS:
  562. * Descriptive string for @err_mask
  563. */
  564. static const char * ata_err_string(unsigned int err_mask)
  565. {
  566. if (err_mask & AC_ERR_HOST_BUS)
  567. return "host bus error";
  568. if (err_mask & AC_ERR_ATA_BUS)
  569. return "ATA bus error";
  570. if (err_mask & AC_ERR_TIMEOUT)
  571. return "timeout";
  572. if (err_mask & AC_ERR_HSM)
  573. return "HSM violation";
  574. if (err_mask & AC_ERR_SYSTEM)
  575. return "internal error";
  576. if (err_mask & AC_ERR_MEDIA)
  577. return "media error";
  578. if (err_mask & AC_ERR_INVALID)
  579. return "invalid argument";
  580. if (err_mask & AC_ERR_DEV)
  581. return "device error";
  582. return "unknown error";
  583. }
  584. /**
  585. * ata_read_log_page - read a specific log page
  586. * @dev: target device
  587. * @page: page to read
  588. * @buf: buffer to store read page
  589. * @sectors: number of sectors to read
  590. *
  591. * Read log page using READ_LOG_EXT command.
  592. *
  593. * LOCKING:
  594. * Kernel thread context (may sleep).
  595. *
  596. * RETURNS:
  597. * 0 on success, AC_ERR_* mask otherwise.
  598. */
  599. static unsigned int ata_read_log_page(struct ata_device *dev,
  600. u8 page, void *buf, unsigned int sectors)
  601. {
  602. struct ata_taskfile tf;
  603. unsigned int err_mask;
  604. DPRINTK("read log page - page %d\n", page);
  605. ata_tf_init(dev, &tf);
  606. tf.command = ATA_CMD_READ_LOG_EXT;
  607. tf.lbal = page;
  608. tf.nsect = sectors;
  609. tf.hob_nsect = sectors >> 8;
  610. tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;
  611. tf.protocol = ATA_PROT_PIO;
  612. err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
  613. buf, sectors * ATA_SECT_SIZE);
  614. DPRINTK("EXIT, err_mask=%x\n", err_mask);
  615. return err_mask;
  616. }
  617. /**
  618. * ata_eh_read_log_10h - Read log page 10h for NCQ error details
  619. * @dev: Device to read log page 10h from
  620. * @tag: Resulting tag of the failed command
  621. * @tf: Resulting taskfile registers of the failed command
  622. *
  623. * Read log page 10h to obtain NCQ error details and clear error
  624. * condition.
  625. *
  626. * LOCKING:
  627. * Kernel thread context (may sleep).
  628. *
  629. * RETURNS:
  630. * 0 on success, -errno otherwise.
  631. */
  632. static int ata_eh_read_log_10h(struct ata_device *dev,
  633. int *tag, struct ata_taskfile *tf)
  634. {
  635. u8 *buf = dev->ap->sector_buf;
  636. unsigned int err_mask;
  637. u8 csum;
  638. int i;
  639. err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1);
  640. if (err_mask)
  641. return -EIO;
  642. csum = 0;
  643. for (i = 0; i < ATA_SECT_SIZE; i++)
  644. csum += buf[i];
  645. if (csum)
  646. ata_dev_printk(dev, KERN_WARNING,
  647. "invalid checksum 0x%x on log page 10h\n", csum);
  648. if (buf[0] & 0x80)
  649. return -ENOENT;
  650. *tag = buf[0] & 0x1f;
  651. tf->command = buf[2];
  652. tf->feature = buf[3];
  653. tf->lbal = buf[4];
  654. tf->lbam = buf[5];
  655. tf->lbah = buf[6];
  656. tf->device = buf[7];
  657. tf->hob_lbal = buf[8];
  658. tf->hob_lbam = buf[9];
  659. tf->hob_lbah = buf[10];
  660. tf->nsect = buf[12];
  661. tf->hob_nsect = buf[13];
  662. return 0;
  663. }
  664. /**
  665. * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
  666. * @dev: device to perform REQUEST_SENSE to
  667. * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
  668. *
  669. * Perform ATAPI REQUEST_SENSE after the device reported CHECK
  670. * SENSE. This function is EH helper.
  671. *
  672. * LOCKING:
  673. * Kernel thread context (may sleep).
  674. *
  675. * RETURNS:
  676. * 0 on success, AC_ERR_* mask on failure
  677. */
  678. static unsigned int atapi_eh_request_sense(struct ata_device *dev,
  679. unsigned char *sense_buf)
  680. {
  681. struct ata_port *ap = dev->ap;
  682. struct ata_taskfile tf;
  683. u8 cdb[ATAPI_CDB_LEN];
  684. DPRINTK("ATAPI request sense\n");
  685. ata_tf_init(dev, &tf);
  686. /* FIXME: is this needed? */
  687. memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
  688. /* XXX: why tf_read here? */
  689. ap->ops->tf_read(ap, &tf);
  690. /* fill these in, for the case where they are -not- overwritten */
  691. sense_buf[0] = 0x70;
  692. sense_buf[2] = tf.feature >> 4;
  693. memset(cdb, 0, ATAPI_CDB_LEN);
  694. cdb[0] = REQUEST_SENSE;
  695. cdb[4] = SCSI_SENSE_BUFFERSIZE;
  696. tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
  697. tf.command = ATA_CMD_PACKET;
  698. /* is it pointless to prefer PIO for "safety reasons"? */
  699. if (ap->flags & ATA_FLAG_PIO_DMA) {
  700. tf.protocol = ATA_PROT_ATAPI_DMA;
  701. tf.feature |= ATAPI_PKT_DMA;
  702. } else {
  703. tf.protocol = ATA_PROT_ATAPI;
  704. tf.lbam = (8 * 1024) & 0xff;
  705. tf.lbah = (8 * 1024) >> 8;
  706. }
  707. return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
  708. sense_buf, SCSI_SENSE_BUFFERSIZE);
  709. }
  710. /**
  711. * ata_eh_analyze_serror - analyze SError for a failed port
  712. * @ap: ATA port to analyze SError for
  713. *
  714. * Analyze SError if available and further determine cause of
  715. * failure.
  716. *
  717. * LOCKING:
  718. * None.
  719. */
  720. static void ata_eh_analyze_serror(struct ata_port *ap)
  721. {
  722. struct ata_eh_context *ehc = &ap->eh_context;
  723. u32 serror = ehc->i.serror;
  724. unsigned int err_mask = 0, action = 0;
  725. if (serror & SERR_PERSISTENT) {
  726. err_mask |= AC_ERR_ATA_BUS;
  727. action |= ATA_EH_HARDRESET;
  728. }
  729. if (serror &
  730. (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
  731. err_mask |= AC_ERR_ATA_BUS;
  732. action |= ATA_EH_SOFTRESET;
  733. }
  734. if (serror & SERR_PROTOCOL) {
  735. err_mask |= AC_ERR_HSM;
  736. action |= ATA_EH_SOFTRESET;
  737. }
  738. if (serror & SERR_INTERNAL) {
  739. err_mask |= AC_ERR_SYSTEM;
  740. action |= ATA_EH_SOFTRESET;
  741. }
  742. if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) {
  743. err_mask |= AC_ERR_ATA_BUS;
  744. action |= ATA_EH_HARDRESET;
  745. }
  746. ehc->i.err_mask |= err_mask;
  747. ehc->i.action |= action;
  748. }
  749. /**
  750. * ata_eh_analyze_ncq_error - analyze NCQ error
  751. * @ap: ATA port to analyze NCQ error for
  752. *
  753. * Read log page 10h, determine the offending qc and acquire
  754. * error status TF. For NCQ device errors, all LLDDs have to do
  755. * is setting AC_ERR_DEV in ehi->err_mask. This function takes
  756. * care of the rest.
  757. *
  758. * LOCKING:
  759. * Kernel thread context (may sleep).
  760. */
  761. static void ata_eh_analyze_ncq_error(struct ata_port *ap)
  762. {
  763. struct ata_eh_context *ehc = &ap->eh_context;
  764. struct ata_device *dev = ap->device;
  765. struct ata_queued_cmd *qc;
  766. struct ata_taskfile tf;
  767. int tag, rc;
  768. /* if frozen, we can't do much */
  769. if (ap->flags & ATA_FLAG_FROZEN)
  770. return;
  771. /* is it NCQ device error? */
  772. if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
  773. return;
  774. /* has LLDD analyzed already? */
  775. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  776. qc = __ata_qc_from_tag(ap, tag);
  777. if (!(qc->flags & ATA_QCFLAG_FAILED))
  778. continue;
  779. if (qc->err_mask)
  780. return;
  781. }
  782. /* okay, this error is ours */
  783. rc = ata_eh_read_log_10h(dev, &tag, &tf);
  784. if (rc) {
  785. ata_port_printk(ap, KERN_ERR, "failed to read log page 10h "
  786. "(errno=%d)\n", rc);
  787. return;
  788. }
  789. if (!(ap->sactive & (1 << tag))) {
  790. ata_port_printk(ap, KERN_ERR, "log page 10h reported "
  791. "inactive tag %d\n", tag);
  792. return;
  793. }
  794. /* we've got the perpetrator, condemn it */
  795. qc = __ata_qc_from_tag(ap, tag);
  796. memcpy(&qc->result_tf, &tf, sizeof(tf));
  797. qc->err_mask |= AC_ERR_DEV;
  798. ehc->i.err_mask &= ~AC_ERR_DEV;
  799. }
  800. /**
  801. * ata_eh_analyze_tf - analyze taskfile of a failed qc
  802. * @qc: qc to analyze
  803. * @tf: Taskfile registers to analyze
  804. *
  805. * Analyze taskfile of @qc and further determine cause of
  806. * failure. This function also requests ATAPI sense data if
  807. * avaliable.
  808. *
  809. * LOCKING:
  810. * Kernel thread context (may sleep).
  811. *
  812. * RETURNS:
  813. * Determined recovery action
  814. */
  815. static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
  816. const struct ata_taskfile *tf)
  817. {
  818. unsigned int tmp, action = 0;
  819. u8 stat = tf->command, err = tf->feature;
  820. if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
  821. qc->err_mask |= AC_ERR_HSM;
  822. return ATA_EH_SOFTRESET;
  823. }
  824. if (!(qc->err_mask & AC_ERR_DEV))
  825. return 0;
  826. switch (qc->dev->class) {
  827. case ATA_DEV_ATA:
  828. if (err & ATA_ICRC)
  829. qc->err_mask |= AC_ERR_ATA_BUS;
  830. if (err & ATA_UNC)
  831. qc->err_mask |= AC_ERR_MEDIA;
  832. if (err & ATA_IDNF)
  833. qc->err_mask |= AC_ERR_INVALID;
  834. break;
  835. case ATA_DEV_ATAPI:
  836. tmp = atapi_eh_request_sense(qc->dev,
  837. qc->scsicmd->sense_buffer);
  838. if (!tmp) {
  839. /* ATA_QCFLAG_SENSE_VALID is used to tell
  840. * atapi_qc_complete() that sense data is
  841. * already valid.
  842. *
  843. * TODO: interpret sense data and set
  844. * appropriate err_mask.
  845. */
  846. qc->flags |= ATA_QCFLAG_SENSE_VALID;
  847. } else
  848. qc->err_mask |= tmp;
  849. }
  850. if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
  851. action |= ATA_EH_SOFTRESET;
  852. return action;
  853. }
  854. static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
  855. {
  856. if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
  857. return 1;
  858. if (ent->is_io) {
  859. if (ent->err_mask & AC_ERR_HSM)
  860. return 1;
  861. if ((ent->err_mask &
  862. (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
  863. return 2;
  864. }
  865. return 0;
  866. }
  867. struct speed_down_needed_arg {
  868. u64 since;
  869. int nr_errors[3];
  870. };
  871. static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
  872. {
  873. struct speed_down_needed_arg *arg = void_arg;
  874. if (ent->timestamp < arg->since)
  875. return -1;
  876. arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
  877. return 0;
  878. }
  879. /**
  880. * ata_eh_speed_down_needed - Determine wheter speed down is necessary
  881. * @dev: Device of interest
  882. *
  883. * This function examines error ring of @dev and determines
  884. * whether speed down is necessary. Speed down is necessary if
  885. * there have been more than 3 of Cat-1 errors or 10 of Cat-2
  886. * errors during last 15 minutes.
  887. *
  888. * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
  889. * violation for known supported commands.
  890. *
  891. * Cat-2 errors are unclassified DEV error for known supported
  892. * command.
  893. *
  894. * LOCKING:
  895. * Inherited from caller.
  896. *
  897. * RETURNS:
  898. * 1 if speed down is necessary, 0 otherwise
  899. */
  900. static int ata_eh_speed_down_needed(struct ata_device *dev)
  901. {
  902. const u64 interval = 15LLU * 60 * HZ;
  903. static const int err_limits[3] = { -1, 3, 10 };
  904. struct speed_down_needed_arg arg;
  905. struct ata_ering_entry *ent;
  906. int err_cat;
  907. u64 j64;
  908. ent = ata_ering_top(&dev->ering);
  909. if (!ent)
  910. return 0;
  911. err_cat = ata_eh_categorize_ering_entry(ent);
  912. if (err_cat == 0)
  913. return 0;
  914. memset(&arg, 0, sizeof(arg));
  915. j64 = get_jiffies_64();
  916. if (j64 >= interval)
  917. arg.since = j64 - interval;
  918. else
  919. arg.since = 0;
  920. ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
  921. return arg.nr_errors[err_cat] > err_limits[err_cat];
  922. }
  923. /**
  924. * ata_eh_speed_down - record error and speed down if necessary
  925. * @dev: Failed device
  926. * @is_io: Did the device fail during normal IO?
  927. * @err_mask: err_mask of the error
  928. *
  929. * Record error and examine error history to determine whether
  930. * adjusting transmission speed is necessary. It also sets
  931. * transmission limits appropriately if such adjustment is
  932. * necessary.
  933. *
  934. * LOCKING:
  935. * Kernel thread context (may sleep).
  936. *
  937. * RETURNS:
  938. * 0 on success, -errno otherwise
  939. */
  940. static int ata_eh_speed_down(struct ata_device *dev, int is_io,
  941. unsigned int err_mask)
  942. {
  943. if (!err_mask)
  944. return 0;
  945. /* record error and determine whether speed down is necessary */
  946. ata_ering_record(&dev->ering, is_io, err_mask);
  947. if (!ata_eh_speed_down_needed(dev))
  948. return 0;
  949. /* speed down SATA link speed if possible */
  950. if (sata_down_spd_limit(dev->ap) == 0)
  951. return ATA_EH_HARDRESET;
  952. /* lower transfer mode */
  953. if (ata_down_xfermask_limit(dev, 0) == 0)
  954. return ATA_EH_SOFTRESET;
  955. ata_dev_printk(dev, KERN_ERR,
  956. "speed down requested but no transfer mode left\n");
  957. return 0;
  958. }
  959. /**
  960. * ata_eh_autopsy - analyze error and determine recovery action
  961. * @ap: ATA port to perform autopsy on
  962. *
  963. * Analyze why @ap failed and determine which recovery action is
  964. * needed. This function also sets more detailed AC_ERR_* values
  965. * and fills sense data for ATAPI CHECK SENSE.
  966. *
  967. * LOCKING:
  968. * Kernel thread context (may sleep).
  969. */
  970. static void ata_eh_autopsy(struct ata_port *ap)
  971. {
  972. struct ata_eh_context *ehc = &ap->eh_context;
  973. unsigned int action = ehc->i.action;
  974. struct ata_device *failed_dev = NULL;
  975. unsigned int all_err_mask = 0;
  976. int tag, is_io = 0;
  977. u32 serror;
  978. int rc;
  979. DPRINTK("ENTER\n");
  980. /* obtain and analyze SError */
  981. rc = sata_scr_read(ap, SCR_ERROR, &serror);
  982. if (rc == 0) {
  983. ehc->i.serror |= serror;
  984. ata_eh_analyze_serror(ap);
  985. } else if (rc != -EOPNOTSUPP)
  986. action |= ATA_EH_HARDRESET;
  987. /* analyze NCQ failure */
  988. ata_eh_analyze_ncq_error(ap);
  989. /* any real error trumps AC_ERR_OTHER */
  990. if (ehc->i.err_mask & ~AC_ERR_OTHER)
  991. ehc->i.err_mask &= ~AC_ERR_OTHER;
  992. all_err_mask |= ehc->i.err_mask;
  993. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  994. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  995. if (!(qc->flags & ATA_QCFLAG_FAILED))
  996. continue;
  997. /* inherit upper level err_mask */
  998. qc->err_mask |= ehc->i.err_mask;
  999. /* analyze TF */
  1000. action |= ata_eh_analyze_tf(qc, &qc->result_tf);
  1001. /* DEV errors are probably spurious in case of ATA_BUS error */
  1002. if (qc->err_mask & AC_ERR_ATA_BUS)
  1003. qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
  1004. AC_ERR_INVALID);
  1005. /* any real error trumps unknown error */
  1006. if (qc->err_mask & ~AC_ERR_OTHER)
  1007. qc->err_mask &= ~AC_ERR_OTHER;
  1008. /* SENSE_VALID trumps dev/unknown error and revalidation */
  1009. if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
  1010. qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
  1011. action &= ~ATA_EH_REVALIDATE;
  1012. }
  1013. /* accumulate error info */
  1014. failed_dev = qc->dev;
  1015. all_err_mask |= qc->err_mask;
  1016. if (qc->flags & ATA_QCFLAG_IO)
  1017. is_io = 1;
  1018. }
  1019. /* speed down iff command was in progress */
  1020. if (failed_dev)
  1021. action |= ata_eh_speed_down(failed_dev, is_io, all_err_mask);
  1022. /* enforce default EH actions */
  1023. if (ap->flags & ATA_FLAG_FROZEN ||
  1024. all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
  1025. action |= ATA_EH_SOFTRESET;
  1026. else if (all_err_mask)
  1027. action |= ATA_EH_REVALIDATE;
  1028. /* record autopsy result */
  1029. ehc->i.dev = failed_dev;
  1030. ehc->i.action = action;
  1031. DPRINTK("EXIT\n");
  1032. }
  1033. /**
  1034. * ata_eh_report - report error handling to user
  1035. * @ap: ATA port EH is going on
  1036. *
  1037. * Report EH to user.
  1038. *
  1039. * LOCKING:
  1040. * None.
  1041. */
  1042. static void ata_eh_report(struct ata_port *ap)
  1043. {
  1044. struct ata_eh_context *ehc = &ap->eh_context;
  1045. const char *frozen, *desc;
  1046. int tag, nr_failed = 0;
  1047. desc = NULL;
  1048. if (ehc->i.desc[0] != '\0')
  1049. desc = ehc->i.desc;
  1050. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1051. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1052. if (!(qc->flags & ATA_QCFLAG_FAILED))
  1053. continue;
  1054. if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
  1055. continue;
  1056. nr_failed++;
  1057. }
  1058. if (!nr_failed && !ehc->i.err_mask)
  1059. return;
  1060. frozen = "";
  1061. if (ap->flags & ATA_FLAG_FROZEN)
  1062. frozen = " frozen";
  1063. if (ehc->i.dev) {
  1064. ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x "
  1065. "SAct 0x%x SErr 0x%x action 0x%x%s\n",
  1066. ehc->i.err_mask, ap->sactive, ehc->i.serror,
  1067. ehc->i.action, frozen);
  1068. if (desc)
  1069. ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc);
  1070. } else {
  1071. ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x "
  1072. "SAct 0x%x SErr 0x%x action 0x%x%s\n",
  1073. ehc->i.err_mask, ap->sactive, ehc->i.serror,
  1074. ehc->i.action, frozen);
  1075. if (desc)
  1076. ata_port_printk(ap, KERN_ERR, "(%s)\n", desc);
  1077. }
  1078. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1079. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1080. if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask)
  1081. continue;
  1082. ata_dev_printk(qc->dev, KERN_ERR, "tag %d cmd 0x%x "
  1083. "Emask 0x%x stat 0x%x err 0x%x (%s)\n",
  1084. qc->tag, qc->tf.command, qc->err_mask,
  1085. qc->result_tf.command, qc->result_tf.feature,
  1086. ata_err_string(qc->err_mask));
  1087. }
  1088. }
  1089. static int ata_eh_reset(struct ata_port *ap, ata_reset_fn_t softreset,
  1090. ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
  1091. {
  1092. struct ata_eh_context *ehc = &ap->eh_context;
  1093. unsigned int classes[ATA_MAX_DEVICES];
  1094. int tries = ATA_EH_RESET_TRIES;
  1095. ata_reset_fn_t reset;
  1096. int rc;
  1097. if (softreset && (!hardreset || (!sata_set_spd_needed(ap) &&
  1098. !(ehc->i.action & ATA_EH_HARDRESET))))
  1099. reset = softreset;
  1100. else
  1101. reset = hardreset;
  1102. retry:
  1103. ata_port_printk(ap, KERN_INFO, "%s resetting port\n",
  1104. reset == softreset ? "soft" : "hard");
  1105. /* reset */
  1106. ata_eh_about_to_do(ap, ATA_EH_RESET_MASK);
  1107. ehc->i.flags |= ATA_EHI_DID_RESET;
  1108. rc = ata_do_reset(ap, reset, classes);
  1109. if (rc && --tries) {
  1110. ata_port_printk(ap, KERN_WARNING,
  1111. "%sreset failed, retrying in 5 secs\n",
  1112. reset == softreset ? "soft" : "hard");
  1113. ssleep(5);
  1114. if (reset == hardreset)
  1115. sata_down_spd_limit(ap);
  1116. if (hardreset)
  1117. reset = hardreset;
  1118. goto retry;
  1119. }
  1120. if (rc == 0) {
  1121. if (postreset)
  1122. postreset(ap, classes);
  1123. /* reset successful, schedule revalidation */
  1124. ehc->i.dev = NULL;
  1125. ehc->i.action &= ~ATA_EH_RESET_MASK;
  1126. ehc->i.action |= ATA_EH_REVALIDATE;
  1127. }
  1128. return rc;
  1129. }
  1130. static int ata_eh_revalidate(struct ata_port *ap,
  1131. struct ata_device **r_failed_dev)
  1132. {
  1133. struct ata_eh_context *ehc = &ap->eh_context;
  1134. struct ata_device *dev;
  1135. int i, rc = 0;
  1136. DPRINTK("ENTER\n");
  1137. for (i = 0; i < ATA_MAX_DEVICES; i++) {
  1138. dev = &ap->device[i];
  1139. if (ehc->i.action & ATA_EH_REVALIDATE && ata_dev_enabled(dev) &&
  1140. (!ehc->i.dev || ehc->i.dev == dev)) {
  1141. if (ata_port_offline(ap)) {
  1142. rc = -EIO;
  1143. break;
  1144. }
  1145. ata_eh_about_to_do(ap, ATA_EH_REVALIDATE);
  1146. rc = ata_dev_revalidate(dev,
  1147. ehc->i.flags & ATA_EHI_DID_RESET);
  1148. if (rc)
  1149. break;
  1150. ehc->i.action &= ~ATA_EH_REVALIDATE;
  1151. }
  1152. }
  1153. if (rc)
  1154. *r_failed_dev = dev;
  1155. DPRINTK("EXIT\n");
  1156. return rc;
  1157. }
  1158. static int ata_port_nr_enabled(struct ata_port *ap)
  1159. {
  1160. int i, cnt = 0;
  1161. for (i = 0; i < ATA_MAX_DEVICES; i++)
  1162. if (ata_dev_enabled(&ap->device[i]))
  1163. cnt++;
  1164. return cnt;
  1165. }
  1166. /**
  1167. * ata_eh_recover - recover host port after error
  1168. * @ap: host port to recover
  1169. * @softreset: softreset method (can be NULL)
  1170. * @hardreset: hardreset method (can be NULL)
  1171. * @postreset: postreset method (can be NULL)
  1172. *
  1173. * This is the alpha and omega, eum and yang, heart and soul of
  1174. * libata exception handling. On entry, actions required to
  1175. * recover each devices are recorded in eh_context. This
  1176. * function executes all the operations with appropriate retrials
  1177. * and fallbacks to resurrect failed devices.
  1178. *
  1179. * LOCKING:
  1180. * Kernel thread context (may sleep).
  1181. *
  1182. * RETURNS:
  1183. * 0 on success, -errno on failure.
  1184. */
  1185. static int ata_eh_recover(struct ata_port *ap, ata_reset_fn_t softreset,
  1186. ata_reset_fn_t hardreset,
  1187. ata_postreset_fn_t postreset)
  1188. {
  1189. struct ata_eh_context *ehc = &ap->eh_context;
  1190. struct ata_device *dev;
  1191. int down_xfermask, i, rc;
  1192. DPRINTK("ENTER\n");
  1193. /* prep for recovery */
  1194. for (i = 0; i < ATA_MAX_DEVICES; i++) {
  1195. dev = &ap->device[i];
  1196. ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
  1197. }
  1198. retry:
  1199. down_xfermask = 0;
  1200. rc = 0;
  1201. /* skip EH if possible. */
  1202. if (!ata_port_nr_enabled(ap) && !(ap->flags & ATA_FLAG_FROZEN))
  1203. ehc->i.action = 0;
  1204. /* reset */
  1205. if (ehc->i.action & ATA_EH_RESET_MASK) {
  1206. ata_eh_freeze_port(ap);
  1207. rc = ata_eh_reset(ap, softreset, hardreset, postreset);
  1208. if (rc) {
  1209. ata_port_printk(ap, KERN_ERR,
  1210. "reset failed, giving up\n");
  1211. goto out;
  1212. }
  1213. ata_eh_thaw_port(ap);
  1214. }
  1215. /* revalidate existing devices */
  1216. rc = ata_eh_revalidate(ap, &dev);
  1217. if (rc)
  1218. goto dev_fail;
  1219. /* configure transfer mode if the port has been reset */
  1220. if (ehc->i.flags & ATA_EHI_DID_RESET) {
  1221. rc = ata_set_mode(ap, &dev);
  1222. if (rc) {
  1223. down_xfermask = 1;
  1224. goto dev_fail;
  1225. }
  1226. }
  1227. goto out;
  1228. dev_fail:
  1229. switch (rc) {
  1230. case -ENODEV:
  1231. case -EINVAL:
  1232. ehc->tries[dev->devno] = 0;
  1233. break;
  1234. case -EIO:
  1235. sata_down_spd_limit(ap);
  1236. default:
  1237. ehc->tries[dev->devno]--;
  1238. if (down_xfermask &&
  1239. ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1))
  1240. ehc->tries[dev->devno] = 0;
  1241. }
  1242. /* disable device if it has used up all its chances */
  1243. if (ata_dev_enabled(dev) && !ehc->tries[dev->devno])
  1244. ata_dev_disable(dev);
  1245. /* soft didn't work? be haaaaard */
  1246. if (ehc->i.flags & ATA_EHI_DID_RESET)
  1247. ehc->i.action |= ATA_EH_HARDRESET;
  1248. else
  1249. ehc->i.action |= ATA_EH_SOFTRESET;
  1250. if (ata_port_nr_enabled(ap)) {
  1251. ata_port_printk(ap, KERN_WARNING, "failed to recover some "
  1252. "devices, retrying in 5 secs\n");
  1253. ssleep(5);
  1254. } else {
  1255. /* no device left, repeat fast */
  1256. msleep(500);
  1257. }
  1258. goto retry;
  1259. out:
  1260. if (rc) {
  1261. for (i = 0; i < ATA_MAX_DEVICES; i++)
  1262. ata_dev_disable(&ap->device[i]);
  1263. }
  1264. DPRINTK("EXIT, rc=%d\n", rc);
  1265. return rc;
  1266. }
  1267. /**
  1268. * ata_eh_finish - finish up EH
  1269. * @ap: host port to finish EH for
  1270. *
  1271. * Recovery is complete. Clean up EH states and retry or finish
  1272. * failed qcs.
  1273. *
  1274. * LOCKING:
  1275. * None.
  1276. */
  1277. static void ata_eh_finish(struct ata_port *ap)
  1278. {
  1279. int tag;
  1280. /* retry or finish qcs */
  1281. for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
  1282. struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
  1283. if (!(qc->flags & ATA_QCFLAG_FAILED))
  1284. continue;
  1285. if (qc->err_mask) {
  1286. /* FIXME: Once EH migration is complete,
  1287. * generate sense data in this function,
  1288. * considering both err_mask and tf.
  1289. */
  1290. if (qc->err_mask & AC_ERR_INVALID)
  1291. ata_eh_qc_complete(qc);
  1292. else
  1293. ata_eh_qc_retry(qc);
  1294. } else {
  1295. if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
  1296. ata_eh_qc_complete(qc);
  1297. } else {
  1298. /* feed zero TF to sense generation */
  1299. memset(&qc->result_tf, 0, sizeof(qc->result_tf));
  1300. ata_eh_qc_retry(qc);
  1301. }
  1302. }
  1303. }
  1304. }
  1305. /**
  1306. * ata_do_eh - do standard error handling
  1307. * @ap: host port to handle error for
  1308. * @softreset: softreset method (can be NULL)
  1309. * @hardreset: hardreset method (can be NULL)
  1310. * @postreset: postreset method (can be NULL)
  1311. *
  1312. * Perform standard error handling sequence.
  1313. *
  1314. * LOCKING:
  1315. * Kernel thread context (may sleep).
  1316. */
  1317. void ata_do_eh(struct ata_port *ap, ata_reset_fn_t softreset,
  1318. ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
  1319. {
  1320. ata_eh_autopsy(ap);
  1321. ata_eh_report(ap);
  1322. ata_eh_recover(ap, softreset, hardreset, postreset);
  1323. ata_eh_finish(ap);
  1324. }