aerdrv_core.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879
  1. /*
  2. * drivers/pci/pcie/aer/aerdrv_core.c
  3. *
  4. * This file is subject to the terms and conditions of the GNU General Public
  5. * License. See the file "COPYING" in the main directory of this archive
  6. * for more details.
  7. *
  8. * This file implements the core part of PCI-Express AER. When an pci-express
  9. * error is delivered, an error message will be collected and printed to
  10. * console, then, an error recovery procedure will be executed by following
  11. * the pci error recovery rules.
  12. *
  13. * Copyright (C) 2006 Intel Corp.
  14. * Tom Long Nguyen (tom.l.nguyen@intel.com)
  15. * Zhang Yanmin (yanmin.zhang@intel.com)
  16. *
  17. */
  18. #include <linux/module.h>
  19. #include <linux/pci.h>
  20. #include <linux/kernel.h>
  21. #include <linux/errno.h>
  22. #include <linux/pm.h>
  23. #include <linux/suspend.h>
  24. #include <linux/delay.h>
  25. #include "aerdrv.h"
  26. static int forceload;
  27. static int nosourceid;
  28. module_param(forceload, bool, 0);
  29. module_param(nosourceid, bool, 0);
  30. int pci_enable_pcie_error_reporting(struct pci_dev *dev)
  31. {
  32. u16 reg16 = 0;
  33. int pos;
  34. if (dev->aer_firmware_first)
  35. return -EIO;
  36. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  37. if (!pos)
  38. return -EIO;
  39. pos = pci_pcie_cap(dev);
  40. if (!pos)
  41. return -EIO;
  42. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  43. reg16 = reg16 |
  44. PCI_EXP_DEVCTL_CERE |
  45. PCI_EXP_DEVCTL_NFERE |
  46. PCI_EXP_DEVCTL_FERE |
  47. PCI_EXP_DEVCTL_URRE;
  48. pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
  49. return 0;
  50. }
  51. EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
  52. int pci_disable_pcie_error_reporting(struct pci_dev *dev)
  53. {
  54. u16 reg16 = 0;
  55. int pos;
  56. if (dev->aer_firmware_first)
  57. return -EIO;
  58. pos = pci_pcie_cap(dev);
  59. if (!pos)
  60. return -EIO;
  61. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  62. reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE |
  63. PCI_EXP_DEVCTL_NFERE |
  64. PCI_EXP_DEVCTL_FERE |
  65. PCI_EXP_DEVCTL_URRE);
  66. pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
  67. return 0;
  68. }
  69. EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
  70. int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
  71. {
  72. int pos;
  73. u32 status;
  74. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  75. if (!pos)
  76. return -EIO;
  77. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  78. if (status)
  79. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  80. return 0;
  81. }
  82. EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
  83. static int set_device_error_reporting(struct pci_dev *dev, void *data)
  84. {
  85. bool enable = *((bool *)data);
  86. if ((dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) ||
  87. (dev->pcie_type == PCI_EXP_TYPE_UPSTREAM) ||
  88. (dev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)) {
  89. if (enable)
  90. pci_enable_pcie_error_reporting(dev);
  91. else
  92. pci_disable_pcie_error_reporting(dev);
  93. }
  94. if (enable)
  95. pcie_set_ecrc_checking(dev);
  96. return 0;
  97. }
  98. /**
  99. * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports.
  100. * @dev: pointer to root port's pci_dev data structure
  101. * @enable: true = enable error reporting, false = disable error reporting.
  102. */
  103. static void set_downstream_devices_error_reporting(struct pci_dev *dev,
  104. bool enable)
  105. {
  106. set_device_error_reporting(dev, &enable);
  107. if (!dev->subordinate)
  108. return;
  109. pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
  110. }
  111. static inline int compare_device_id(struct pci_dev *dev,
  112. struct aer_err_info *e_info)
  113. {
  114. if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) {
  115. /*
  116. * Device ID match
  117. */
  118. return 1;
  119. }
  120. return 0;
  121. }
  122. static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
  123. {
  124. if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
  125. e_info->dev[e_info->error_dev_num] = dev;
  126. e_info->error_dev_num++;
  127. return 1;
  128. }
  129. return 0;
  130. }
  131. #define PCI_BUS(x) (((x) >> 8) & 0xff)
  132. static int find_device_iter(struct pci_dev *dev, void *data)
  133. {
  134. int pos;
  135. u32 status;
  136. u32 mask;
  137. u16 reg16;
  138. int result;
  139. struct aer_err_info *e_info = (struct aer_err_info *)data;
  140. /*
  141. * When bus id is equal to 0, it might be a bad id
  142. * reported by root port.
  143. */
  144. if (!nosourceid && (PCI_BUS(e_info->id) != 0)) {
  145. result = compare_device_id(dev, e_info);
  146. if (result)
  147. add_error_device(e_info, dev);
  148. /*
  149. * If there is no multiple error, we stop
  150. * or continue based on the id comparing.
  151. */
  152. if (!e_info->multi_error_valid)
  153. return result;
  154. /*
  155. * If there are multiple errors and id does match,
  156. * We need continue to search other devices under
  157. * the root port. Return 0 means that.
  158. */
  159. if (result)
  160. return 0;
  161. }
  162. /*
  163. * When either
  164. * 1) nosourceid==y;
  165. * 2) bus id is equal to 0. Some ports might lose the bus
  166. * id of error source id;
  167. * 3) There are multiple errors and prior id comparing fails;
  168. * We check AER status registers to find the initial reporter.
  169. */
  170. if (atomic_read(&dev->enable_cnt) == 0)
  171. return 0;
  172. pos = pci_pcie_cap(dev);
  173. if (!pos)
  174. return 0;
  175. /* Check if AER is enabled */
  176. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  177. if (!(reg16 & (
  178. PCI_EXP_DEVCTL_CERE |
  179. PCI_EXP_DEVCTL_NFERE |
  180. PCI_EXP_DEVCTL_FERE |
  181. PCI_EXP_DEVCTL_URRE)))
  182. return 0;
  183. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  184. if (!pos)
  185. return 0;
  186. status = 0;
  187. mask = 0;
  188. if (e_info->severity == AER_CORRECTABLE) {
  189. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  190. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask);
  191. if (status & ~mask) {
  192. add_error_device(e_info, dev);
  193. goto added;
  194. }
  195. } else {
  196. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  197. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
  198. if (status & ~mask) {
  199. add_error_device(e_info, dev);
  200. goto added;
  201. }
  202. }
  203. return 0;
  204. added:
  205. if (e_info->multi_error_valid)
  206. return 0;
  207. else
  208. return 1;
  209. }
  210. /**
  211. * find_source_device - search through device hierarchy for source device
  212. * @parent: pointer to Root Port pci_dev data structure
  213. * @err_info: including detailed error information such like id
  214. *
  215. * Invoked when error is detected at the Root Port.
  216. */
  217. static void find_source_device(struct pci_dev *parent,
  218. struct aer_err_info *e_info)
  219. {
  220. struct pci_dev *dev = parent;
  221. int result;
  222. /* Is Root Port an agent that sends error message? */
  223. result = find_device_iter(dev, e_info);
  224. if (result)
  225. return;
  226. pci_walk_bus(parent->subordinate, find_device_iter, e_info);
  227. }
  228. static int report_error_detected(struct pci_dev *dev, void *data)
  229. {
  230. pci_ers_result_t vote;
  231. struct pci_error_handlers *err_handler;
  232. struct aer_broadcast_data *result_data;
  233. result_data = (struct aer_broadcast_data *) data;
  234. dev->error_state = result_data->state;
  235. if (!dev->driver ||
  236. !dev->driver->err_handler ||
  237. !dev->driver->err_handler->error_detected) {
  238. if (result_data->state == pci_channel_io_frozen &&
  239. !(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) {
  240. /*
  241. * In case of fatal recovery, if one of down-
  242. * stream device has no driver. We might be
  243. * unable to recover because a later insmod
  244. * of a driver for this device is unaware of
  245. * its hw state.
  246. */
  247. dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n",
  248. dev->driver ?
  249. "no AER-aware driver" : "no driver");
  250. }
  251. return 0;
  252. }
  253. err_handler = dev->driver->err_handler;
  254. vote = err_handler->error_detected(dev, result_data->state);
  255. result_data->result = merge_result(result_data->result, vote);
  256. return 0;
  257. }
  258. static int report_mmio_enabled(struct pci_dev *dev, void *data)
  259. {
  260. pci_ers_result_t vote;
  261. struct pci_error_handlers *err_handler;
  262. struct aer_broadcast_data *result_data;
  263. result_data = (struct aer_broadcast_data *) data;
  264. if (!dev->driver ||
  265. !dev->driver->err_handler ||
  266. !dev->driver->err_handler->mmio_enabled)
  267. return 0;
  268. err_handler = dev->driver->err_handler;
  269. vote = err_handler->mmio_enabled(dev);
  270. result_data->result = merge_result(result_data->result, vote);
  271. return 0;
  272. }
  273. static int report_slot_reset(struct pci_dev *dev, void *data)
  274. {
  275. pci_ers_result_t vote;
  276. struct pci_error_handlers *err_handler;
  277. struct aer_broadcast_data *result_data;
  278. result_data = (struct aer_broadcast_data *) data;
  279. if (!dev->driver ||
  280. !dev->driver->err_handler ||
  281. !dev->driver->err_handler->slot_reset)
  282. return 0;
  283. err_handler = dev->driver->err_handler;
  284. vote = err_handler->slot_reset(dev);
  285. result_data->result = merge_result(result_data->result, vote);
  286. return 0;
  287. }
  288. static int report_resume(struct pci_dev *dev, void *data)
  289. {
  290. struct pci_error_handlers *err_handler;
  291. dev->error_state = pci_channel_io_normal;
  292. if (!dev->driver ||
  293. !dev->driver->err_handler ||
  294. !dev->driver->err_handler->resume)
  295. return 0;
  296. err_handler = dev->driver->err_handler;
  297. err_handler->resume(dev);
  298. return 0;
  299. }
  300. /**
  301. * broadcast_error_message - handle message broadcast to downstream drivers
  302. * @dev: pointer to from where in a hierarchy message is broadcasted down
  303. * @state: error state
  304. * @error_mesg: message to print
  305. * @cb: callback to be broadcasted
  306. *
  307. * Invoked during error recovery process. Once being invoked, the content
  308. * of error severity will be broadcasted to all downstream drivers in a
  309. * hierarchy in question.
  310. */
  311. static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
  312. enum pci_channel_state state,
  313. char *error_mesg,
  314. int (*cb)(struct pci_dev *, void *))
  315. {
  316. struct aer_broadcast_data result_data;
  317. dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg);
  318. result_data.state = state;
  319. if (cb == report_error_detected)
  320. result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
  321. else
  322. result_data.result = PCI_ERS_RESULT_RECOVERED;
  323. if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) {
  324. /*
  325. * If the error is reported by a bridge, we think this error
  326. * is related to the downstream link of the bridge, so we
  327. * do error recovery on all subordinates of the bridge instead
  328. * of the bridge and clear the error status of the bridge.
  329. */
  330. if (cb == report_error_detected)
  331. dev->error_state = state;
  332. pci_walk_bus(dev->subordinate, cb, &result_data);
  333. if (cb == report_resume) {
  334. pci_cleanup_aer_uncorrect_error_status(dev);
  335. dev->error_state = pci_channel_io_normal;
  336. }
  337. } else {
  338. /*
  339. * If the error is reported by an end point, we think this
  340. * error is related to the upstream link of the end point.
  341. */
  342. pci_walk_bus(dev->bus, cb, &result_data);
  343. }
  344. return result_data.result;
  345. }
  346. struct find_aer_service_data {
  347. struct pcie_port_service_driver *aer_driver;
  348. int is_downstream;
  349. };
  350. static int find_aer_service_iter(struct device *device, void *data)
  351. {
  352. struct device_driver *driver;
  353. struct pcie_port_service_driver *service_driver;
  354. struct find_aer_service_data *result;
  355. result = (struct find_aer_service_data *) data;
  356. if (device->bus == &pcie_port_bus_type) {
  357. struct pcie_device *pcie = to_pcie_device(device);
  358. if (pcie->port->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
  359. result->is_downstream = 1;
  360. driver = device->driver;
  361. if (driver) {
  362. service_driver = to_service_driver(driver);
  363. if (service_driver->service == PCIE_PORT_SERVICE_AER) {
  364. result->aer_driver = service_driver;
  365. return 1;
  366. }
  367. }
  368. }
  369. return 0;
  370. }
  371. static void find_aer_service(struct pci_dev *dev,
  372. struct find_aer_service_data *data)
  373. {
  374. int retval;
  375. retval = device_for_each_child(&dev->dev, data, find_aer_service_iter);
  376. }
  377. static pci_ers_result_t reset_link(struct pcie_device *aerdev,
  378. struct pci_dev *dev)
  379. {
  380. struct pci_dev *udev;
  381. pci_ers_result_t status;
  382. struct find_aer_service_data data;
  383. if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)
  384. udev = dev;
  385. else
  386. udev = dev->bus->self;
  387. data.is_downstream = 0;
  388. data.aer_driver = NULL;
  389. find_aer_service(udev, &data);
  390. /*
  391. * Use the aer driver of the error agent firstly.
  392. * If it hasn't the aer driver, use the root port's
  393. */
  394. if (!data.aer_driver || !data.aer_driver->reset_link) {
  395. if (data.is_downstream &&
  396. aerdev->device.driver &&
  397. to_service_driver(aerdev->device.driver)->reset_link) {
  398. data.aer_driver =
  399. to_service_driver(aerdev->device.driver);
  400. } else {
  401. dev_printk(KERN_DEBUG, &dev->dev, "no link-reset "
  402. "support\n");
  403. return PCI_ERS_RESULT_DISCONNECT;
  404. }
  405. }
  406. status = data.aer_driver->reset_link(udev);
  407. if (status != PCI_ERS_RESULT_RECOVERED) {
  408. dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream "
  409. "device %s failed\n", pci_name(udev));
  410. return PCI_ERS_RESULT_DISCONNECT;
  411. }
  412. return status;
  413. }
  414. /**
  415. * do_recovery - handle nonfatal/fatal error recovery process
  416. * @aerdev: pointer to a pcie_device data structure of root port
  417. * @dev: pointer to a pci_dev data structure of agent detecting an error
  418. * @severity: error severity type
  419. *
  420. * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
  421. * error detected message to all downstream drivers within a hierarchy in
  422. * question and return the returned code.
  423. */
  424. static pci_ers_result_t do_recovery(struct pcie_device *aerdev,
  425. struct pci_dev *dev,
  426. int severity)
  427. {
  428. pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
  429. enum pci_channel_state state;
  430. if (severity == AER_FATAL)
  431. state = pci_channel_io_frozen;
  432. else
  433. state = pci_channel_io_normal;
  434. status = broadcast_error_message(dev,
  435. state,
  436. "error_detected",
  437. report_error_detected);
  438. if (severity == AER_FATAL) {
  439. result = reset_link(aerdev, dev);
  440. if (result != PCI_ERS_RESULT_RECOVERED) {
  441. /* TODO: Should panic here? */
  442. return result;
  443. }
  444. }
  445. if (status == PCI_ERS_RESULT_CAN_RECOVER)
  446. status = broadcast_error_message(dev,
  447. state,
  448. "mmio_enabled",
  449. report_mmio_enabled);
  450. if (status == PCI_ERS_RESULT_NEED_RESET) {
  451. /*
  452. * TODO: Should call platform-specific
  453. * functions to reset slot before calling
  454. * drivers' slot_reset callbacks?
  455. */
  456. status = broadcast_error_message(dev,
  457. state,
  458. "slot_reset",
  459. report_slot_reset);
  460. }
  461. if (status == PCI_ERS_RESULT_RECOVERED)
  462. broadcast_error_message(dev,
  463. state,
  464. "resume",
  465. report_resume);
  466. return status;
  467. }
  468. /**
  469. * handle_error_source - handle logging error into an event log
  470. * @aerdev: pointer to pcie_device data structure of the root port
  471. * @dev: pointer to pci_dev data structure of error source device
  472. * @info: comprehensive error information
  473. *
  474. * Invoked when an error being detected by Root Port.
  475. */
  476. static void handle_error_source(struct pcie_device *aerdev,
  477. struct pci_dev *dev,
  478. struct aer_err_info *info)
  479. {
  480. pci_ers_result_t status = 0;
  481. int pos;
  482. if (info->severity == AER_CORRECTABLE) {
  483. /*
  484. * Correctable error does not need software intevention.
  485. * No need to go through error recovery process.
  486. */
  487. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  488. if (pos)
  489. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  490. info->status);
  491. } else {
  492. status = do_recovery(aerdev, dev, info->severity);
  493. if (status == PCI_ERS_RESULT_RECOVERED) {
  494. dev_printk(KERN_DEBUG, &dev->dev, "AER driver "
  495. "successfully recovered\n");
  496. } else {
  497. /* TODO: Should kernel panic here? */
  498. dev_printk(KERN_DEBUG, &dev->dev, "AER driver didn't "
  499. "recover\n");
  500. }
  501. }
  502. }
  503. /**
  504. * aer_enable_rootport - enable Root Port's interrupts when receiving messages
  505. * @rpc: pointer to a Root Port data structure
  506. *
  507. * Invoked when PCIE bus loads AER service driver.
  508. */
  509. void aer_enable_rootport(struct aer_rpc *rpc)
  510. {
  511. struct pci_dev *pdev = rpc->rpd->port;
  512. int pos, aer_pos;
  513. u16 reg16;
  514. u32 reg32;
  515. pos = pci_pcie_cap(pdev);
  516. /* Clear PCIE Capability's Device Status */
  517. pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, &reg16);
  518. pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16);
  519. /* Disable system error generation in response to error messages */
  520. pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, &reg16);
  521. reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
  522. pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
  523. aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
  524. /* Clear error status */
  525. pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
  526. pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
  527. pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32);
  528. pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
  529. pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32);
  530. pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);
  531. /*
  532. * Enable error reporting for the root port device and downstream port
  533. * devices.
  534. */
  535. set_downstream_devices_error_reporting(pdev, true);
  536. /* Enable Root Port's interrupt in response to error messages */
  537. pci_write_config_dword(pdev,
  538. aer_pos + PCI_ERR_ROOT_COMMAND,
  539. ROOT_PORT_INTR_ON_MESG_MASK);
  540. }
  541. /**
  542. * disable_root_aer - disable Root Port's interrupts when receiving messages
  543. * @rpc: pointer to a Root Port data structure
  544. *
  545. * Invoked when PCIE bus unloads AER service driver.
  546. */
  547. static void disable_root_aer(struct aer_rpc *rpc)
  548. {
  549. struct pci_dev *pdev = rpc->rpd->port;
  550. u32 reg32;
  551. int pos;
  552. /*
  553. * Disable error reporting for the root port device and downstream port
  554. * devices.
  555. */
  556. set_downstream_devices_error_reporting(pdev, false);
  557. pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
  558. /* Disable Root's interrupt in response to error messages */
  559. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
  560. /* Clear Root's error status reg */
  561. pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, &reg32);
  562. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32);
  563. }
  564. /**
  565. * get_e_source - retrieve an error source
  566. * @rpc: pointer to the root port which holds an error
  567. *
  568. * Invoked by DPC handler to consume an error.
  569. */
  570. static struct aer_err_source *get_e_source(struct aer_rpc *rpc)
  571. {
  572. struct aer_err_source *e_source;
  573. unsigned long flags;
  574. /* Lock access to Root error producer/consumer index */
  575. spin_lock_irqsave(&rpc->e_lock, flags);
  576. if (rpc->prod_idx == rpc->cons_idx) {
  577. spin_unlock_irqrestore(&rpc->e_lock, flags);
  578. return NULL;
  579. }
  580. e_source = &rpc->e_sources[rpc->cons_idx];
  581. rpc->cons_idx++;
  582. if (rpc->cons_idx == AER_ERROR_SOURCES_MAX)
  583. rpc->cons_idx = 0;
  584. spin_unlock_irqrestore(&rpc->e_lock, flags);
  585. return e_source;
  586. }
  587. /**
  588. * get_device_error_info - read error status from dev and store it to info
  589. * @dev: pointer to the device expected to have a error record
  590. * @info: pointer to structure to store the error record
  591. *
  592. * Return 1 on success, 0 on error.
  593. */
  594. static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
  595. {
  596. int pos, temp;
  597. info->status = 0;
  598. info->tlp_header_valid = 0;
  599. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  600. /* The device might not support AER */
  601. if (!pos)
  602. return 1;
  603. if (info->severity == AER_CORRECTABLE) {
  604. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  605. &info->status);
  606. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK,
  607. &info->mask);
  608. if (!(info->status & ~info->mask))
  609. return 0;
  610. } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE ||
  611. info->severity == AER_NONFATAL) {
  612. /* Link is still healthy for IO reads */
  613. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS,
  614. &info->status);
  615. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK,
  616. &info->mask);
  617. if (!(info->status & ~info->mask))
  618. return 0;
  619. /* Get First Error Pointer */
  620. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp);
  621. info->first_error = PCI_ERR_CAP_FEP(temp);
  622. if (info->status & AER_LOG_TLP_MASKS) {
  623. info->tlp_header_valid = 1;
  624. pci_read_config_dword(dev,
  625. pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
  626. pci_read_config_dword(dev,
  627. pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
  628. pci_read_config_dword(dev,
  629. pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
  630. pci_read_config_dword(dev,
  631. pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
  632. }
  633. }
  634. return 1;
  635. }
  636. static inline void aer_process_err_devices(struct pcie_device *p_device,
  637. struct aer_err_info *e_info)
  638. {
  639. int i;
  640. if (!e_info->dev[0]) {
  641. dev_printk(KERN_DEBUG, &p_device->port->dev,
  642. "can't find device of ID%04x\n",
  643. e_info->id);
  644. }
  645. /* Report all before handle them, not to lost records by reset etc. */
  646. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  647. if (get_device_error_info(e_info->dev[i], e_info))
  648. aer_print_error(e_info->dev[i], e_info);
  649. }
  650. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  651. if (get_device_error_info(e_info->dev[i], e_info))
  652. handle_error_source(p_device, e_info->dev[i], e_info);
  653. }
  654. }
  655. /**
  656. * aer_isr_one_error - consume an error detected by root port
  657. * @p_device: pointer to error root port service device
  658. * @e_src: pointer to an error source
  659. */
  660. static void aer_isr_one_error(struct pcie_device *p_device,
  661. struct aer_err_source *e_src)
  662. {
  663. struct aer_err_info *e_info;
  664. int i;
  665. /* struct aer_err_info might be big, so we allocate it with slab */
  666. e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL);
  667. if (e_info == NULL) {
  668. dev_printk(KERN_DEBUG, &p_device->port->dev,
  669. "Can't allocate mem when processing AER errors\n");
  670. return;
  671. }
  672. /*
  673. * There is a possibility that both correctable error and
  674. * uncorrectable error being logged. Report correctable error first.
  675. */
  676. for (i = 1; i & ROOT_ERR_STATUS_MASKS ; i <<= 2) {
  677. if (i > 4)
  678. break;
  679. if (!(e_src->status & i))
  680. continue;
  681. memset(e_info, 0, sizeof(struct aer_err_info));
  682. /* Init comprehensive error information */
  683. if (i & PCI_ERR_ROOT_COR_RCV) {
  684. e_info->id = ERR_COR_ID(e_src->id);
  685. e_info->severity = AER_CORRECTABLE;
  686. } else {
  687. e_info->id = ERR_UNCOR_ID(e_src->id);
  688. e_info->severity = ((e_src->status >> 6) & 1);
  689. }
  690. if (e_src->status &
  691. (PCI_ERR_ROOT_MULTI_COR_RCV |
  692. PCI_ERR_ROOT_MULTI_UNCOR_RCV))
  693. e_info->multi_error_valid = 1;
  694. aer_print_port_info(p_device->port, e_info);
  695. find_source_device(p_device->port, e_info);
  696. aer_process_err_devices(p_device, e_info);
  697. }
  698. kfree(e_info);
  699. }
  700. /**
  701. * aer_isr - consume errors detected by root port
  702. * @work: definition of this work item
  703. *
  704. * Invoked, as DPC, when root port records new detected error
  705. */
  706. void aer_isr(struct work_struct *work)
  707. {
  708. struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
  709. struct pcie_device *p_device = rpc->rpd;
  710. struct aer_err_source *e_src;
  711. mutex_lock(&rpc->rpc_mutex);
  712. e_src = get_e_source(rpc);
  713. while (e_src) {
  714. aer_isr_one_error(p_device, e_src);
  715. e_src = get_e_source(rpc);
  716. }
  717. mutex_unlock(&rpc->rpc_mutex);
  718. wake_up(&rpc->wait_release);
  719. }
  720. /**
  721. * aer_delete_rootport - disable root port aer and delete service data
  722. * @rpc: pointer to a root port device being deleted
  723. *
  724. * Invoked when AER service unloaded on a specific Root Port
  725. */
  726. void aer_delete_rootport(struct aer_rpc *rpc)
  727. {
  728. /* Disable root port AER itself */
  729. disable_root_aer(rpc);
  730. kfree(rpc);
  731. }
  732. /**
  733. * aer_init - provide AER initialization
  734. * @dev: pointer to AER pcie device
  735. *
  736. * Invoked when AER service driver is loaded.
  737. */
  738. int aer_init(struct pcie_device *dev)
  739. {
  740. if (dev->port->aer_firmware_first) {
  741. dev_printk(KERN_DEBUG, &dev->device,
  742. "PCIe errors handled by platform firmware.\n");
  743. goto out;
  744. }
  745. if (aer_osc_setup(dev))
  746. goto out;
  747. return 0;
  748. out:
  749. if (forceload) {
  750. dev_printk(KERN_DEBUG, &dev->device,
  751. "aerdrv forceload requested.\n");
  752. dev->port->aer_firmware_first = 0;
  753. return 0;
  754. }
  755. return -ENXIO;
  756. }