aerdrv_core.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901
  1. /*
  2. * drivers/pci/pcie/aer/aerdrv_core.c
  3. *
  4. * This file is subject to the terms and conditions of the GNU General Public
  5. * License. See the file "COPYING" in the main directory of this archive
  6. * for more details.
  7. *
  8. * This file implements the core part of PCI-Express AER. When an pci-express
  9. * error is delivered, an error message will be collected and printed to
  10. * console, then, an error recovery procedure will be executed by following
  11. * the pci error recovery rules.
  12. *
  13. * Copyright (C) 2006 Intel Corp.
  14. * Tom Long Nguyen (tom.l.nguyen@intel.com)
  15. * Zhang Yanmin (yanmin.zhang@intel.com)
  16. *
  17. */
  18. #include <linux/module.h>
  19. #include <linux/pci.h>
  20. #include <linux/kernel.h>
  21. #include <linux/errno.h>
  22. #include <linux/pm.h>
  23. #include <linux/suspend.h>
  24. #include <linux/delay.h>
  25. #include "aerdrv.h"
  26. static int forceload;
  27. static int nosourceid;
  28. module_param(forceload, bool, 0);
  29. module_param(nosourceid, bool, 0);
  30. int pci_enable_pcie_error_reporting(struct pci_dev *dev)
  31. {
  32. u16 reg16 = 0;
  33. int pos;
  34. if (dev->aer_firmware_first)
  35. return -EIO;
  36. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  37. if (!pos)
  38. return -EIO;
  39. pos = pci_pcie_cap(dev);
  40. if (!pos)
  41. return -EIO;
  42. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  43. reg16 = reg16 |
  44. PCI_EXP_DEVCTL_CERE |
  45. PCI_EXP_DEVCTL_NFERE |
  46. PCI_EXP_DEVCTL_FERE |
  47. PCI_EXP_DEVCTL_URRE;
  48. pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
  49. return 0;
  50. }
  51. EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
  52. int pci_disable_pcie_error_reporting(struct pci_dev *dev)
  53. {
  54. u16 reg16 = 0;
  55. int pos;
  56. if (dev->aer_firmware_first)
  57. return -EIO;
  58. pos = pci_pcie_cap(dev);
  59. if (!pos)
  60. return -EIO;
  61. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  62. reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE |
  63. PCI_EXP_DEVCTL_NFERE |
  64. PCI_EXP_DEVCTL_FERE |
  65. PCI_EXP_DEVCTL_URRE);
  66. pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
  67. return 0;
  68. }
  69. EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
  70. int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
  71. {
  72. int pos;
  73. u32 status, mask;
  74. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  75. if (!pos)
  76. return -EIO;
  77. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  78. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
  79. if (dev->error_state == pci_channel_io_normal)
  80. status &= ~mask; /* Clear corresponding nonfatal bits */
  81. else
  82. status &= mask; /* Clear corresponding fatal bits */
  83. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  84. return 0;
  85. }
  86. EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
  87. #if 0
  88. int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
  89. {
  90. int pos;
  91. u32 status;
  92. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  93. if (!pos)
  94. return -EIO;
  95. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  96. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
  97. return 0;
  98. }
  99. #endif /* 0 */
  100. static int set_device_error_reporting(struct pci_dev *dev, void *data)
  101. {
  102. bool enable = *((bool *)data);
  103. if (dev->pcie_type == PCIE_RC_PORT ||
  104. dev->pcie_type == PCIE_SW_UPSTREAM_PORT ||
  105. dev->pcie_type == PCIE_SW_DOWNSTREAM_PORT) {
  106. if (enable)
  107. pci_enable_pcie_error_reporting(dev);
  108. else
  109. pci_disable_pcie_error_reporting(dev);
  110. }
  111. if (enable)
  112. pcie_set_ecrc_checking(dev);
  113. return 0;
  114. }
  115. /**
  116. * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports.
  117. * @dev: pointer to root port's pci_dev data structure
  118. * @enable: true = enable error reporting, false = disable error reporting.
  119. */
  120. static void set_downstream_devices_error_reporting(struct pci_dev *dev,
  121. bool enable)
  122. {
  123. set_device_error_reporting(dev, &enable);
  124. if (!dev->subordinate)
  125. return;
  126. pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
  127. }
  128. static inline int compare_device_id(struct pci_dev *dev,
  129. struct aer_err_info *e_info)
  130. {
  131. if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) {
  132. /*
  133. * Device ID match
  134. */
  135. return 1;
  136. }
  137. return 0;
  138. }
  139. static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
  140. {
  141. if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
  142. e_info->dev[e_info->error_dev_num] = dev;
  143. e_info->error_dev_num++;
  144. return 1;
  145. }
  146. return 0;
  147. }
  148. #define PCI_BUS(x) (((x) >> 8) & 0xff)
  149. static int find_device_iter(struct pci_dev *dev, void *data)
  150. {
  151. int pos;
  152. u32 status;
  153. u32 mask;
  154. u16 reg16;
  155. int result;
  156. struct aer_err_info *e_info = (struct aer_err_info *)data;
  157. /*
  158. * When bus id is equal to 0, it might be a bad id
  159. * reported by root port.
  160. */
  161. if (!nosourceid && (PCI_BUS(e_info->id) != 0)) {
  162. result = compare_device_id(dev, e_info);
  163. if (result)
  164. add_error_device(e_info, dev);
  165. /*
  166. * If there is no multiple error, we stop
  167. * or continue based on the id comparing.
  168. */
  169. if (!e_info->multi_error_valid)
  170. return result;
  171. /*
  172. * If there are multiple errors and id does match,
  173. * We need continue to search other devices under
  174. * the root port. Return 0 means that.
  175. */
  176. if (result)
  177. return 0;
  178. }
  179. /*
  180. * When either
  181. * 1) nosourceid==y;
  182. * 2) bus id is equal to 0. Some ports might lose the bus
  183. * id of error source id;
  184. * 3) There are multiple errors and prior id comparing fails;
  185. * We check AER status registers to find the initial reporter.
  186. */
  187. if (atomic_read(&dev->enable_cnt) == 0)
  188. return 0;
  189. pos = pci_pcie_cap(dev);
  190. if (!pos)
  191. return 0;
  192. /* Check if AER is enabled */
  193. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  194. if (!(reg16 & (
  195. PCI_EXP_DEVCTL_CERE |
  196. PCI_EXP_DEVCTL_NFERE |
  197. PCI_EXP_DEVCTL_FERE |
  198. PCI_EXP_DEVCTL_URRE)))
  199. return 0;
  200. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  201. if (!pos)
  202. return 0;
  203. status = 0;
  204. mask = 0;
  205. if (e_info->severity == AER_CORRECTABLE) {
  206. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  207. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask);
  208. if (status & ~mask) {
  209. add_error_device(e_info, dev);
  210. goto added;
  211. }
  212. } else {
  213. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  214. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
  215. if (status & ~mask) {
  216. add_error_device(e_info, dev);
  217. goto added;
  218. }
  219. }
  220. return 0;
  221. added:
  222. if (e_info->multi_error_valid)
  223. return 0;
  224. else
  225. return 1;
  226. }
  227. /**
  228. * find_source_device - search through device hierarchy for source device
  229. * @parent: pointer to Root Port pci_dev data structure
  230. * @err_info: including detailed error information such like id
  231. *
  232. * Invoked when error is detected at the Root Port.
  233. */
  234. static void find_source_device(struct pci_dev *parent,
  235. struct aer_err_info *e_info)
  236. {
  237. struct pci_dev *dev = parent;
  238. int result;
  239. /* Is Root Port an agent that sends error message? */
  240. result = find_device_iter(dev, e_info);
  241. if (result)
  242. return;
  243. pci_walk_bus(parent->subordinate, find_device_iter, e_info);
  244. }
  245. static int report_error_detected(struct pci_dev *dev, void *data)
  246. {
  247. pci_ers_result_t vote;
  248. struct pci_error_handlers *err_handler;
  249. struct aer_broadcast_data *result_data;
  250. result_data = (struct aer_broadcast_data *) data;
  251. dev->error_state = result_data->state;
  252. if (!dev->driver ||
  253. !dev->driver->err_handler ||
  254. !dev->driver->err_handler->error_detected) {
  255. if (result_data->state == pci_channel_io_frozen &&
  256. !(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) {
  257. /*
  258. * In case of fatal recovery, if one of down-
  259. * stream device has no driver. We might be
  260. * unable to recover because a later insmod
  261. * of a driver for this device is unaware of
  262. * its hw state.
  263. */
  264. dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n",
  265. dev->driver ?
  266. "no AER-aware driver" : "no driver");
  267. }
  268. return 0;
  269. }
  270. err_handler = dev->driver->err_handler;
  271. vote = err_handler->error_detected(dev, result_data->state);
  272. result_data->result = merge_result(result_data->result, vote);
  273. return 0;
  274. }
  275. static int report_mmio_enabled(struct pci_dev *dev, void *data)
  276. {
  277. pci_ers_result_t vote;
  278. struct pci_error_handlers *err_handler;
  279. struct aer_broadcast_data *result_data;
  280. result_data = (struct aer_broadcast_data *) data;
  281. if (!dev->driver ||
  282. !dev->driver->err_handler ||
  283. !dev->driver->err_handler->mmio_enabled)
  284. return 0;
  285. err_handler = dev->driver->err_handler;
  286. vote = err_handler->mmio_enabled(dev);
  287. result_data->result = merge_result(result_data->result, vote);
  288. return 0;
  289. }
  290. static int report_slot_reset(struct pci_dev *dev, void *data)
  291. {
  292. pci_ers_result_t vote;
  293. struct pci_error_handlers *err_handler;
  294. struct aer_broadcast_data *result_data;
  295. result_data = (struct aer_broadcast_data *) data;
  296. if (!dev->driver ||
  297. !dev->driver->err_handler ||
  298. !dev->driver->err_handler->slot_reset)
  299. return 0;
  300. err_handler = dev->driver->err_handler;
  301. vote = err_handler->slot_reset(dev);
  302. result_data->result = merge_result(result_data->result, vote);
  303. return 0;
  304. }
  305. static int report_resume(struct pci_dev *dev, void *data)
  306. {
  307. struct pci_error_handlers *err_handler;
  308. dev->error_state = pci_channel_io_normal;
  309. if (!dev->driver ||
  310. !dev->driver->err_handler ||
  311. !dev->driver->err_handler->resume)
  312. return 0;
  313. err_handler = dev->driver->err_handler;
  314. err_handler->resume(dev);
  315. return 0;
  316. }
  317. /**
  318. * broadcast_error_message - handle message broadcast to downstream drivers
  319. * @dev: pointer to from where in a hierarchy message is broadcasted down
  320. * @state: error state
  321. * @error_mesg: message to print
  322. * @cb: callback to be broadcasted
  323. *
  324. * Invoked during error recovery process. Once being invoked, the content
  325. * of error severity will be broadcasted to all downstream drivers in a
  326. * hierarchy in question.
  327. */
  328. static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
  329. enum pci_channel_state state,
  330. char *error_mesg,
  331. int (*cb)(struct pci_dev *, void *))
  332. {
  333. struct aer_broadcast_data result_data;
  334. dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg);
  335. result_data.state = state;
  336. if (cb == report_error_detected)
  337. result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
  338. else
  339. result_data.result = PCI_ERS_RESULT_RECOVERED;
  340. if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) {
  341. /*
  342. * If the error is reported by a bridge, we think this error
  343. * is related to the downstream link of the bridge, so we
  344. * do error recovery on all subordinates of the bridge instead
  345. * of the bridge and clear the error status of the bridge.
  346. */
  347. if (cb == report_error_detected)
  348. dev->error_state = state;
  349. pci_walk_bus(dev->subordinate, cb, &result_data);
  350. if (cb == report_resume) {
  351. pci_cleanup_aer_uncorrect_error_status(dev);
  352. dev->error_state = pci_channel_io_normal;
  353. }
  354. } else {
  355. /*
  356. * If the error is reported by an end point, we think this
  357. * error is related to the upstream link of the end point.
  358. */
  359. pci_walk_bus(dev->bus, cb, &result_data);
  360. }
  361. return result_data.result;
  362. }
  363. struct find_aer_service_data {
  364. struct pcie_port_service_driver *aer_driver;
  365. int is_downstream;
  366. };
  367. static int find_aer_service_iter(struct device *device, void *data)
  368. {
  369. struct device_driver *driver;
  370. struct pcie_port_service_driver *service_driver;
  371. struct find_aer_service_data *result;
  372. result = (struct find_aer_service_data *) data;
  373. if (device->bus == &pcie_port_bus_type) {
  374. struct pcie_port_data *port_data;
  375. port_data = pci_get_drvdata(to_pcie_device(device)->port);
  376. if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT)
  377. result->is_downstream = 1;
  378. driver = device->driver;
  379. if (driver) {
  380. service_driver = to_service_driver(driver);
  381. if (service_driver->service == PCIE_PORT_SERVICE_AER) {
  382. result->aer_driver = service_driver;
  383. return 1;
  384. }
  385. }
  386. }
  387. return 0;
  388. }
  389. static void find_aer_service(struct pci_dev *dev,
  390. struct find_aer_service_data *data)
  391. {
  392. int retval;
  393. retval = device_for_each_child(&dev->dev, data, find_aer_service_iter);
  394. }
  395. static pci_ers_result_t reset_link(struct pcie_device *aerdev,
  396. struct pci_dev *dev)
  397. {
  398. struct pci_dev *udev;
  399. pci_ers_result_t status;
  400. struct find_aer_service_data data;
  401. if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)
  402. udev = dev;
  403. else
  404. udev = dev->bus->self;
  405. data.is_downstream = 0;
  406. data.aer_driver = NULL;
  407. find_aer_service(udev, &data);
  408. /*
  409. * Use the aer driver of the error agent firstly.
  410. * If it hasn't the aer driver, use the root port's
  411. */
  412. if (!data.aer_driver || !data.aer_driver->reset_link) {
  413. if (data.is_downstream &&
  414. aerdev->device.driver &&
  415. to_service_driver(aerdev->device.driver)->reset_link) {
  416. data.aer_driver =
  417. to_service_driver(aerdev->device.driver);
  418. } else {
  419. dev_printk(KERN_DEBUG, &dev->dev, "no link-reset "
  420. "support\n");
  421. return PCI_ERS_RESULT_DISCONNECT;
  422. }
  423. }
  424. status = data.aer_driver->reset_link(udev);
  425. if (status != PCI_ERS_RESULT_RECOVERED) {
  426. dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream "
  427. "device %s failed\n", pci_name(udev));
  428. return PCI_ERS_RESULT_DISCONNECT;
  429. }
  430. return status;
  431. }
  432. /**
  433. * do_recovery - handle nonfatal/fatal error recovery process
  434. * @aerdev: pointer to a pcie_device data structure of root port
  435. * @dev: pointer to a pci_dev data structure of agent detecting an error
  436. * @severity: error severity type
  437. *
  438. * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
  439. * error detected message to all downstream drivers within a hierarchy in
  440. * question and return the returned code.
  441. */
  442. static pci_ers_result_t do_recovery(struct pcie_device *aerdev,
  443. struct pci_dev *dev,
  444. int severity)
  445. {
  446. pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
  447. enum pci_channel_state state;
  448. if (severity == AER_FATAL)
  449. state = pci_channel_io_frozen;
  450. else
  451. state = pci_channel_io_normal;
  452. status = broadcast_error_message(dev,
  453. state,
  454. "error_detected",
  455. report_error_detected);
  456. if (severity == AER_FATAL) {
  457. result = reset_link(aerdev, dev);
  458. if (result != PCI_ERS_RESULT_RECOVERED) {
  459. /* TODO: Should panic here? */
  460. return result;
  461. }
  462. }
  463. if (status == PCI_ERS_RESULT_CAN_RECOVER)
  464. status = broadcast_error_message(dev,
  465. state,
  466. "mmio_enabled",
  467. report_mmio_enabled);
  468. if (status == PCI_ERS_RESULT_NEED_RESET) {
  469. /*
  470. * TODO: Should call platform-specific
  471. * functions to reset slot before calling
  472. * drivers' slot_reset callbacks?
  473. */
  474. status = broadcast_error_message(dev,
  475. state,
  476. "slot_reset",
  477. report_slot_reset);
  478. }
  479. if (status == PCI_ERS_RESULT_RECOVERED)
  480. broadcast_error_message(dev,
  481. state,
  482. "resume",
  483. report_resume);
  484. return status;
  485. }
  486. /**
  487. * handle_error_source - handle logging error into an event log
  488. * @aerdev: pointer to pcie_device data structure of the root port
  489. * @dev: pointer to pci_dev data structure of error source device
  490. * @info: comprehensive error information
  491. *
  492. * Invoked when an error being detected by Root Port.
  493. */
  494. static void handle_error_source(struct pcie_device *aerdev,
  495. struct pci_dev *dev,
  496. struct aer_err_info *info)
  497. {
  498. pci_ers_result_t status = 0;
  499. int pos;
  500. if (info->severity == AER_CORRECTABLE) {
  501. /*
  502. * Correctable error does not need software intevention.
  503. * No need to go through error recovery process.
  504. */
  505. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  506. if (pos)
  507. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  508. info->status);
  509. } else {
  510. status = do_recovery(aerdev, dev, info->severity);
  511. if (status == PCI_ERS_RESULT_RECOVERED) {
  512. dev_printk(KERN_DEBUG, &dev->dev, "AER driver "
  513. "successfully recovered\n");
  514. } else {
  515. /* TODO: Should kernel panic here? */
  516. dev_printk(KERN_DEBUG, &dev->dev, "AER driver didn't "
  517. "recover\n");
  518. }
  519. }
  520. }
  521. /**
  522. * aer_enable_rootport - enable Root Port's interrupts when receiving messages
  523. * @rpc: pointer to a Root Port data structure
  524. *
  525. * Invoked when PCIE bus loads AER service driver.
  526. */
  527. void aer_enable_rootport(struct aer_rpc *rpc)
  528. {
  529. struct pci_dev *pdev = rpc->rpd->port;
  530. int pos, aer_pos;
  531. u16 reg16;
  532. u32 reg32;
  533. pos = pci_pcie_cap(pdev);
  534. /* Clear PCIE Capability's Device Status */
  535. pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, &reg16);
  536. pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16);
  537. /* Disable system error generation in response to error messages */
  538. pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, &reg16);
  539. reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
  540. pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
  541. aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
  542. /* Clear error status */
  543. pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
  544. pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
  545. pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32);
  546. pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
  547. pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32);
  548. pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);
  549. /*
  550. * Enable error reporting for the root port device and downstream port
  551. * devices.
  552. */
  553. set_downstream_devices_error_reporting(pdev, true);
  554. /* Enable Root Port's interrupt in response to error messages */
  555. pci_write_config_dword(pdev,
  556. aer_pos + PCI_ERR_ROOT_COMMAND,
  557. ROOT_PORT_INTR_ON_MESG_MASK);
  558. }
  559. /**
  560. * disable_root_aer - disable Root Port's interrupts when receiving messages
  561. * @rpc: pointer to a Root Port data structure
  562. *
  563. * Invoked when PCIE bus unloads AER service driver.
  564. */
  565. static void disable_root_aer(struct aer_rpc *rpc)
  566. {
  567. struct pci_dev *pdev = rpc->rpd->port;
  568. u32 reg32;
  569. int pos;
  570. /*
  571. * Disable error reporting for the root port device and downstream port
  572. * devices.
  573. */
  574. set_downstream_devices_error_reporting(pdev, false);
  575. pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
  576. /* Disable Root's interrupt in response to error messages */
  577. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
  578. /* Clear Root's error status reg */
  579. pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, &reg32);
  580. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32);
  581. }
  582. /**
  583. * get_e_source - retrieve an error source
  584. * @rpc: pointer to the root port which holds an error
  585. *
  586. * Invoked by DPC handler to consume an error.
  587. */
  588. static struct aer_err_source *get_e_source(struct aer_rpc *rpc)
  589. {
  590. struct aer_err_source *e_source;
  591. unsigned long flags;
  592. /* Lock access to Root error producer/consumer index */
  593. spin_lock_irqsave(&rpc->e_lock, flags);
  594. if (rpc->prod_idx == rpc->cons_idx) {
  595. spin_unlock_irqrestore(&rpc->e_lock, flags);
  596. return NULL;
  597. }
  598. e_source = &rpc->e_sources[rpc->cons_idx];
  599. rpc->cons_idx++;
  600. if (rpc->cons_idx == AER_ERROR_SOURCES_MAX)
  601. rpc->cons_idx = 0;
  602. spin_unlock_irqrestore(&rpc->e_lock, flags);
  603. return e_source;
  604. }
  605. /**
  606. * get_device_error_info - read error status from dev and store it to info
  607. * @dev: pointer to the device expected to have a error record
  608. * @info: pointer to structure to store the error record
  609. *
  610. * Return 1 on success, 0 on error.
  611. */
  612. static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
  613. {
  614. int pos, temp;
  615. info->status = 0;
  616. info->tlp_header_valid = 0;
  617. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  618. /* The device might not support AER */
  619. if (!pos)
  620. return 1;
  621. if (info->severity == AER_CORRECTABLE) {
  622. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  623. &info->status);
  624. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK,
  625. &info->mask);
  626. if (!(info->status & ~info->mask))
  627. return 0;
  628. } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE ||
  629. info->severity == AER_NONFATAL) {
  630. /* Link is still healthy for IO reads */
  631. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS,
  632. &info->status);
  633. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK,
  634. &info->mask);
  635. if (!(info->status & ~info->mask))
  636. return 0;
  637. /* Get First Error Pointer */
  638. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp);
  639. info->first_error = PCI_ERR_CAP_FEP(temp);
  640. if (info->status & AER_LOG_TLP_MASKS) {
  641. info->tlp_header_valid = 1;
  642. pci_read_config_dword(dev,
  643. pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
  644. pci_read_config_dword(dev,
  645. pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
  646. pci_read_config_dword(dev,
  647. pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
  648. pci_read_config_dword(dev,
  649. pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
  650. }
  651. }
  652. return 1;
  653. }
  654. static inline void aer_process_err_devices(struct pcie_device *p_device,
  655. struct aer_err_info *e_info)
  656. {
  657. int i;
  658. if (!e_info->dev[0]) {
  659. dev_printk(KERN_DEBUG, &p_device->port->dev,
  660. "can't find device of ID%04x\n",
  661. e_info->id);
  662. }
  663. /* Report all before handle them, not to lost records by reset etc. */
  664. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  665. if (get_device_error_info(e_info->dev[i], e_info))
  666. aer_print_error(e_info->dev[i], e_info);
  667. }
  668. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  669. if (get_device_error_info(e_info->dev[i], e_info))
  670. handle_error_source(p_device, e_info->dev[i], e_info);
  671. }
  672. }
  673. /**
  674. * aer_isr_one_error - consume an error detected by root port
  675. * @p_device: pointer to error root port service device
  676. * @e_src: pointer to an error source
  677. */
  678. static void aer_isr_one_error(struct pcie_device *p_device,
  679. struct aer_err_source *e_src)
  680. {
  681. struct aer_err_info *e_info;
  682. int i;
  683. /* struct aer_err_info might be big, so we allocate it with slab */
  684. e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL);
  685. if (e_info == NULL) {
  686. dev_printk(KERN_DEBUG, &p_device->port->dev,
  687. "Can't allocate mem when processing AER errors\n");
  688. return;
  689. }
  690. /*
  691. * There is a possibility that both correctable error and
  692. * uncorrectable error being logged. Report correctable error first.
  693. */
  694. for (i = 1; i & ROOT_ERR_STATUS_MASKS ; i <<= 2) {
  695. if (i > 4)
  696. break;
  697. if (!(e_src->status & i))
  698. continue;
  699. memset(e_info, 0, sizeof(struct aer_err_info));
  700. /* Init comprehensive error information */
  701. if (i & PCI_ERR_ROOT_COR_RCV) {
  702. e_info->id = ERR_COR_ID(e_src->id);
  703. e_info->severity = AER_CORRECTABLE;
  704. } else {
  705. e_info->id = ERR_UNCOR_ID(e_src->id);
  706. e_info->severity = ((e_src->status >> 6) & 1);
  707. }
  708. if (e_src->status &
  709. (PCI_ERR_ROOT_MULTI_COR_RCV |
  710. PCI_ERR_ROOT_MULTI_UNCOR_RCV))
  711. e_info->multi_error_valid = 1;
  712. aer_print_port_info(p_device->port, e_info);
  713. find_source_device(p_device->port, e_info);
  714. aer_process_err_devices(p_device, e_info);
  715. }
  716. kfree(e_info);
  717. }
  718. /**
  719. * aer_isr - consume errors detected by root port
  720. * @work: definition of this work item
  721. *
  722. * Invoked, as DPC, when root port records new detected error
  723. */
  724. void aer_isr(struct work_struct *work)
  725. {
  726. struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
  727. struct pcie_device *p_device = rpc->rpd;
  728. struct aer_err_source *e_src;
  729. mutex_lock(&rpc->rpc_mutex);
  730. e_src = get_e_source(rpc);
  731. while (e_src) {
  732. aer_isr_one_error(p_device, e_src);
  733. e_src = get_e_source(rpc);
  734. }
  735. mutex_unlock(&rpc->rpc_mutex);
  736. wake_up(&rpc->wait_release);
  737. }
  738. /**
  739. * aer_delete_rootport - disable root port aer and delete service data
  740. * @rpc: pointer to a root port device being deleted
  741. *
  742. * Invoked when AER service unloaded on a specific Root Port
  743. */
  744. void aer_delete_rootport(struct aer_rpc *rpc)
  745. {
  746. /* Disable root port AER itself */
  747. disable_root_aer(rpc);
  748. kfree(rpc);
  749. }
  750. /**
  751. * aer_init - provide AER initialization
  752. * @dev: pointer to AER pcie device
  753. *
  754. * Invoked when AER service driver is loaded.
  755. */
  756. int aer_init(struct pcie_device *dev)
  757. {
  758. if (dev->port->aer_firmware_first) {
  759. dev_printk(KERN_DEBUG, &dev->device,
  760. "PCIe errors handled by platform firmware.\n");
  761. goto out;
  762. }
  763. if (aer_osc_setup(dev))
  764. goto out;
  765. return 0;
  766. out:
  767. if (forceload) {
  768. dev_printk(KERN_DEBUG, &dev->device,
  769. "aerdrv forceload requested.\n");
  770. dev->port->aer_firmware_first = 0;
  771. return 0;
  772. }
  773. return -ENXIO;
  774. }