aerdrv_core.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. /*
  2. * drivers/pci/pcie/aer/aerdrv_core.c
  3. *
  4. * This file is subject to the terms and conditions of the GNU General Public
  5. * License. See the file "COPYING" in the main directory of this archive
  6. * for more details.
  7. *
  8. * This file implements the core part of PCI-Express AER. When an pci-express
  9. * error is delivered, an error message will be collected and printed to
  10. * console, then, an error recovery procedure will be executed by following
  11. * the pci error recovery rules.
  12. *
  13. * Copyright (C) 2006 Intel Corp.
  14. * Tom Long Nguyen (tom.l.nguyen@intel.com)
  15. * Zhang Yanmin (yanmin.zhang@intel.com)
  16. *
  17. */
  18. #include <linux/module.h>
  19. #include <linux/pci.h>
  20. #include <linux/kernel.h>
  21. #include <linux/errno.h>
  22. #include <linux/pm.h>
  23. #include <linux/suspend.h>
  24. #include <linux/delay.h>
  25. #include <linux/slab.h>
  26. #include "aerdrv.h"
  27. static int forceload;
  28. static int nosourceid;
  29. module_param(forceload, bool, 0);
  30. module_param(nosourceid, bool, 0);
  31. int pci_enable_pcie_error_reporting(struct pci_dev *dev)
  32. {
  33. u16 reg16 = 0;
  34. int pos;
  35. if (dev->aer_firmware_first)
  36. return -EIO;
  37. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  38. if (!pos)
  39. return -EIO;
  40. pos = pci_pcie_cap(dev);
  41. if (!pos)
  42. return -EIO;
  43. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  44. reg16 = reg16 |
  45. PCI_EXP_DEVCTL_CERE |
  46. PCI_EXP_DEVCTL_NFERE |
  47. PCI_EXP_DEVCTL_FERE |
  48. PCI_EXP_DEVCTL_URRE;
  49. pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
  50. return 0;
  51. }
  52. EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
  53. int pci_disable_pcie_error_reporting(struct pci_dev *dev)
  54. {
  55. u16 reg16 = 0;
  56. int pos;
  57. if (dev->aer_firmware_first)
  58. return -EIO;
  59. pos = pci_pcie_cap(dev);
  60. if (!pos)
  61. return -EIO;
  62. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  63. reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE |
  64. PCI_EXP_DEVCTL_NFERE |
  65. PCI_EXP_DEVCTL_FERE |
  66. PCI_EXP_DEVCTL_URRE);
  67. pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
  68. return 0;
  69. }
  70. EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
  71. int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
  72. {
  73. int pos;
  74. u32 status;
  75. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  76. if (!pos)
  77. return -EIO;
  78. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  79. if (status)
  80. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  81. return 0;
  82. }
  83. EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
  84. static int set_device_error_reporting(struct pci_dev *dev, void *data)
  85. {
  86. bool enable = *((bool *)data);
  87. if ((dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) ||
  88. (dev->pcie_type == PCI_EXP_TYPE_UPSTREAM) ||
  89. (dev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)) {
  90. if (enable)
  91. pci_enable_pcie_error_reporting(dev);
  92. else
  93. pci_disable_pcie_error_reporting(dev);
  94. }
  95. if (enable)
  96. pcie_set_ecrc_checking(dev);
  97. return 0;
  98. }
  99. /**
  100. * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports.
  101. * @dev: pointer to root port's pci_dev data structure
  102. * @enable: true = enable error reporting, false = disable error reporting.
  103. */
  104. static void set_downstream_devices_error_reporting(struct pci_dev *dev,
  105. bool enable)
  106. {
  107. set_device_error_reporting(dev, &enable);
  108. if (!dev->subordinate)
  109. return;
  110. pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
  111. }
  112. static inline int compare_device_id(struct pci_dev *dev,
  113. struct aer_err_info *e_info)
  114. {
  115. if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) {
  116. /*
  117. * Device ID match
  118. */
  119. return 1;
  120. }
  121. return 0;
  122. }
  123. static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
  124. {
  125. if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
  126. e_info->dev[e_info->error_dev_num] = dev;
  127. e_info->error_dev_num++;
  128. return 1;
  129. }
  130. return 0;
  131. }
  132. #define PCI_BUS(x) (((x) >> 8) & 0xff)
  133. static int find_device_iter(struct pci_dev *dev, void *data)
  134. {
  135. int pos;
  136. u32 status;
  137. u32 mask;
  138. u16 reg16;
  139. int result;
  140. struct aer_err_info *e_info = (struct aer_err_info *)data;
  141. /*
  142. * When bus id is equal to 0, it might be a bad id
  143. * reported by root port.
  144. */
  145. if (!nosourceid && (PCI_BUS(e_info->id) != 0)) {
  146. result = compare_device_id(dev, e_info);
  147. if (result)
  148. add_error_device(e_info, dev);
  149. /*
  150. * If there is no multiple error, we stop
  151. * or continue based on the id comparing.
  152. */
  153. if (!e_info->multi_error_valid)
  154. return result;
  155. /*
  156. * If there are multiple errors and id does match,
  157. * We need continue to search other devices under
  158. * the root port. Return 0 means that.
  159. */
  160. if (result)
  161. return 0;
  162. }
  163. /*
  164. * When either
  165. * 1) nosourceid==y;
  166. * 2) bus id is equal to 0. Some ports might lose the bus
  167. * id of error source id;
  168. * 3) There are multiple errors and prior id comparing fails;
  169. * We check AER status registers to find the initial reporter.
  170. */
  171. if (atomic_read(&dev->enable_cnt) == 0)
  172. return 0;
  173. pos = pci_pcie_cap(dev);
  174. if (!pos)
  175. return 0;
  176. /* Check if AER is enabled */
  177. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  178. if (!(reg16 & (
  179. PCI_EXP_DEVCTL_CERE |
  180. PCI_EXP_DEVCTL_NFERE |
  181. PCI_EXP_DEVCTL_FERE |
  182. PCI_EXP_DEVCTL_URRE)))
  183. return 0;
  184. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  185. if (!pos)
  186. return 0;
  187. status = 0;
  188. mask = 0;
  189. if (e_info->severity == AER_CORRECTABLE) {
  190. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  191. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask);
  192. if (status & ~mask) {
  193. add_error_device(e_info, dev);
  194. goto added;
  195. }
  196. } else {
  197. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  198. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
  199. if (status & ~mask) {
  200. add_error_device(e_info, dev);
  201. goto added;
  202. }
  203. }
  204. return 0;
  205. added:
  206. if (e_info->multi_error_valid)
  207. return 0;
  208. else
  209. return 1;
  210. }
  211. /**
  212. * find_source_device - search through device hierarchy for source device
  213. * @parent: pointer to Root Port pci_dev data structure
  214. * @err_info: including detailed error information such like id
  215. *
  216. * Invoked when error is detected at the Root Port.
  217. */
  218. static void find_source_device(struct pci_dev *parent,
  219. struct aer_err_info *e_info)
  220. {
  221. struct pci_dev *dev = parent;
  222. int result;
  223. /* Is Root Port an agent that sends error message? */
  224. result = find_device_iter(dev, e_info);
  225. if (result)
  226. return;
  227. pci_walk_bus(parent->subordinate, find_device_iter, e_info);
  228. }
  229. static int report_error_detected(struct pci_dev *dev, void *data)
  230. {
  231. pci_ers_result_t vote;
  232. struct pci_error_handlers *err_handler;
  233. struct aer_broadcast_data *result_data;
  234. result_data = (struct aer_broadcast_data *) data;
  235. dev->error_state = result_data->state;
  236. if (!dev->driver ||
  237. !dev->driver->err_handler ||
  238. !dev->driver->err_handler->error_detected) {
  239. if (result_data->state == pci_channel_io_frozen &&
  240. !(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) {
  241. /*
  242. * In case of fatal recovery, if one of down-
  243. * stream device has no driver. We might be
  244. * unable to recover because a later insmod
  245. * of a driver for this device is unaware of
  246. * its hw state.
  247. */
  248. dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n",
  249. dev->driver ?
  250. "no AER-aware driver" : "no driver");
  251. }
  252. return 0;
  253. }
  254. err_handler = dev->driver->err_handler;
  255. vote = err_handler->error_detected(dev, result_data->state);
  256. result_data->result = merge_result(result_data->result, vote);
  257. return 0;
  258. }
  259. static int report_mmio_enabled(struct pci_dev *dev, void *data)
  260. {
  261. pci_ers_result_t vote;
  262. struct pci_error_handlers *err_handler;
  263. struct aer_broadcast_data *result_data;
  264. result_data = (struct aer_broadcast_data *) data;
  265. if (!dev->driver ||
  266. !dev->driver->err_handler ||
  267. !dev->driver->err_handler->mmio_enabled)
  268. return 0;
  269. err_handler = dev->driver->err_handler;
  270. vote = err_handler->mmio_enabled(dev);
  271. result_data->result = merge_result(result_data->result, vote);
  272. return 0;
  273. }
  274. static int report_slot_reset(struct pci_dev *dev, void *data)
  275. {
  276. pci_ers_result_t vote;
  277. struct pci_error_handlers *err_handler;
  278. struct aer_broadcast_data *result_data;
  279. result_data = (struct aer_broadcast_data *) data;
  280. if (!dev->driver ||
  281. !dev->driver->err_handler ||
  282. !dev->driver->err_handler->slot_reset)
  283. return 0;
  284. err_handler = dev->driver->err_handler;
  285. vote = err_handler->slot_reset(dev);
  286. result_data->result = merge_result(result_data->result, vote);
  287. return 0;
  288. }
  289. static int report_resume(struct pci_dev *dev, void *data)
  290. {
  291. struct pci_error_handlers *err_handler;
  292. dev->error_state = pci_channel_io_normal;
  293. if (!dev->driver ||
  294. !dev->driver->err_handler ||
  295. !dev->driver->err_handler->resume)
  296. return 0;
  297. err_handler = dev->driver->err_handler;
  298. err_handler->resume(dev);
  299. return 0;
  300. }
  301. /**
  302. * broadcast_error_message - handle message broadcast to downstream drivers
  303. * @dev: pointer to from where in a hierarchy message is broadcasted down
  304. * @state: error state
  305. * @error_mesg: message to print
  306. * @cb: callback to be broadcasted
  307. *
  308. * Invoked during error recovery process. Once being invoked, the content
  309. * of error severity will be broadcasted to all downstream drivers in a
  310. * hierarchy in question.
  311. */
  312. static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
  313. enum pci_channel_state state,
  314. char *error_mesg,
  315. int (*cb)(struct pci_dev *, void *))
  316. {
  317. struct aer_broadcast_data result_data;
  318. dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg);
  319. result_data.state = state;
  320. if (cb == report_error_detected)
  321. result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
  322. else
  323. result_data.result = PCI_ERS_RESULT_RECOVERED;
  324. if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) {
  325. /*
  326. * If the error is reported by a bridge, we think this error
  327. * is related to the downstream link of the bridge, so we
  328. * do error recovery on all subordinates of the bridge instead
  329. * of the bridge and clear the error status of the bridge.
  330. */
  331. if (cb == report_error_detected)
  332. dev->error_state = state;
  333. pci_walk_bus(dev->subordinate, cb, &result_data);
  334. if (cb == report_resume) {
  335. pci_cleanup_aer_uncorrect_error_status(dev);
  336. dev->error_state = pci_channel_io_normal;
  337. }
  338. } else {
  339. /*
  340. * If the error is reported by an end point, we think this
  341. * error is related to the upstream link of the end point.
  342. */
  343. pci_walk_bus(dev->bus, cb, &result_data);
  344. }
  345. return result_data.result;
  346. }
  347. struct find_aer_service_data {
  348. struct pcie_port_service_driver *aer_driver;
  349. int is_downstream;
  350. };
  351. static int find_aer_service_iter(struct device *device, void *data)
  352. {
  353. struct device_driver *driver;
  354. struct pcie_port_service_driver *service_driver;
  355. struct find_aer_service_data *result;
  356. result = (struct find_aer_service_data *) data;
  357. if (device->bus == &pcie_port_bus_type) {
  358. struct pcie_device *pcie = to_pcie_device(device);
  359. if (pcie->port->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
  360. result->is_downstream = 1;
  361. driver = device->driver;
  362. if (driver) {
  363. service_driver = to_service_driver(driver);
  364. if (service_driver->service == PCIE_PORT_SERVICE_AER) {
  365. result->aer_driver = service_driver;
  366. return 1;
  367. }
  368. }
  369. }
  370. return 0;
  371. }
  372. static void find_aer_service(struct pci_dev *dev,
  373. struct find_aer_service_data *data)
  374. {
  375. int retval;
  376. retval = device_for_each_child(&dev->dev, data, find_aer_service_iter);
  377. }
  378. static pci_ers_result_t reset_link(struct pcie_device *aerdev,
  379. struct pci_dev *dev)
  380. {
  381. struct pci_dev *udev;
  382. pci_ers_result_t status;
  383. struct find_aer_service_data data;
  384. if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)
  385. udev = dev;
  386. else
  387. udev = dev->bus->self;
  388. data.is_downstream = 0;
  389. data.aer_driver = NULL;
  390. find_aer_service(udev, &data);
  391. /*
  392. * Use the aer driver of the error agent firstly.
  393. * If it hasn't the aer driver, use the root port's
  394. */
  395. if (!data.aer_driver || !data.aer_driver->reset_link) {
  396. if (data.is_downstream &&
  397. aerdev->device.driver &&
  398. to_service_driver(aerdev->device.driver)->reset_link) {
  399. data.aer_driver =
  400. to_service_driver(aerdev->device.driver);
  401. } else {
  402. dev_printk(KERN_DEBUG, &dev->dev, "no link-reset "
  403. "support\n");
  404. return PCI_ERS_RESULT_DISCONNECT;
  405. }
  406. }
  407. status = data.aer_driver->reset_link(udev);
  408. if (status != PCI_ERS_RESULT_RECOVERED) {
  409. dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream "
  410. "device %s failed\n", pci_name(udev));
  411. return PCI_ERS_RESULT_DISCONNECT;
  412. }
  413. return status;
  414. }
  415. /**
  416. * do_recovery - handle nonfatal/fatal error recovery process
  417. * @aerdev: pointer to a pcie_device data structure of root port
  418. * @dev: pointer to a pci_dev data structure of agent detecting an error
  419. * @severity: error severity type
  420. *
  421. * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
  422. * error detected message to all downstream drivers within a hierarchy in
  423. * question and return the returned code.
  424. */
  425. static pci_ers_result_t do_recovery(struct pcie_device *aerdev,
  426. struct pci_dev *dev,
  427. int severity)
  428. {
  429. pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
  430. enum pci_channel_state state;
  431. if (severity == AER_FATAL)
  432. state = pci_channel_io_frozen;
  433. else
  434. state = pci_channel_io_normal;
  435. status = broadcast_error_message(dev,
  436. state,
  437. "error_detected",
  438. report_error_detected);
  439. if (severity == AER_FATAL) {
  440. result = reset_link(aerdev, dev);
  441. if (result != PCI_ERS_RESULT_RECOVERED) {
  442. /* TODO: Should panic here? */
  443. return result;
  444. }
  445. }
  446. if (status == PCI_ERS_RESULT_CAN_RECOVER)
  447. status = broadcast_error_message(dev,
  448. state,
  449. "mmio_enabled",
  450. report_mmio_enabled);
  451. if (status == PCI_ERS_RESULT_NEED_RESET) {
  452. /*
  453. * TODO: Should call platform-specific
  454. * functions to reset slot before calling
  455. * drivers' slot_reset callbacks?
  456. */
  457. status = broadcast_error_message(dev,
  458. state,
  459. "slot_reset",
  460. report_slot_reset);
  461. }
  462. if (status == PCI_ERS_RESULT_RECOVERED)
  463. broadcast_error_message(dev,
  464. state,
  465. "resume",
  466. report_resume);
  467. return status;
  468. }
  469. /**
  470. * handle_error_source - handle logging error into an event log
  471. * @aerdev: pointer to pcie_device data structure of the root port
  472. * @dev: pointer to pci_dev data structure of error source device
  473. * @info: comprehensive error information
  474. *
  475. * Invoked when an error being detected by Root Port.
  476. */
  477. static void handle_error_source(struct pcie_device *aerdev,
  478. struct pci_dev *dev,
  479. struct aer_err_info *info)
  480. {
  481. pci_ers_result_t status = 0;
  482. int pos;
  483. if (info->severity == AER_CORRECTABLE) {
  484. /*
  485. * Correctable error does not need software intevention.
  486. * No need to go through error recovery process.
  487. */
  488. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  489. if (pos)
  490. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  491. info->status);
  492. } else {
  493. status = do_recovery(aerdev, dev, info->severity);
  494. if (status == PCI_ERS_RESULT_RECOVERED) {
  495. dev_printk(KERN_DEBUG, &dev->dev, "AER driver "
  496. "successfully recovered\n");
  497. } else {
  498. /* TODO: Should kernel panic here? */
  499. dev_printk(KERN_DEBUG, &dev->dev, "AER driver didn't "
  500. "recover\n");
  501. }
  502. }
  503. }
  504. /**
  505. * aer_enable_rootport - enable Root Port's interrupts when receiving messages
  506. * @rpc: pointer to a Root Port data structure
  507. *
  508. * Invoked when PCIe bus loads AER service driver.
  509. */
  510. void aer_enable_rootport(struct aer_rpc *rpc)
  511. {
  512. struct pci_dev *pdev = rpc->rpd->port;
  513. int pos, aer_pos;
  514. u16 reg16;
  515. u32 reg32;
  516. pos = pci_pcie_cap(pdev);
  517. /* Clear PCIe Capability's Device Status */
  518. pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, &reg16);
  519. pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16);
  520. /* Disable system error generation in response to error messages */
  521. pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, &reg16);
  522. reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
  523. pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
  524. aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
  525. /* Clear error status */
  526. pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
  527. pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
  528. pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32);
  529. pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
  530. pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32);
  531. pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);
  532. /*
  533. * Enable error reporting for the root port device and downstream port
  534. * devices.
  535. */
  536. set_downstream_devices_error_reporting(pdev, true);
  537. /* Enable Root Port's interrupt in response to error messages */
  538. pci_write_config_dword(pdev,
  539. aer_pos + PCI_ERR_ROOT_COMMAND,
  540. ROOT_PORT_INTR_ON_MESG_MASK);
  541. }
  542. /**
  543. * disable_root_aer - disable Root Port's interrupts when receiving messages
  544. * @rpc: pointer to a Root Port data structure
  545. *
  546. * Invoked when PCIe bus unloads AER service driver.
  547. */
  548. static void disable_root_aer(struct aer_rpc *rpc)
  549. {
  550. struct pci_dev *pdev = rpc->rpd->port;
  551. u32 reg32;
  552. int pos;
  553. /*
  554. * Disable error reporting for the root port device and downstream port
  555. * devices.
  556. */
  557. set_downstream_devices_error_reporting(pdev, false);
  558. pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
  559. /* Disable Root's interrupt in response to error messages */
  560. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
  561. /* Clear Root's error status reg */
  562. pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, &reg32);
  563. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32);
  564. }
  565. /**
  566. * get_e_source - retrieve an error source
  567. * @rpc: pointer to the root port which holds an error
  568. *
  569. * Invoked by DPC handler to consume an error.
  570. */
  571. static struct aer_err_source *get_e_source(struct aer_rpc *rpc)
  572. {
  573. struct aer_err_source *e_source;
  574. unsigned long flags;
  575. /* Lock access to Root error producer/consumer index */
  576. spin_lock_irqsave(&rpc->e_lock, flags);
  577. if (rpc->prod_idx == rpc->cons_idx) {
  578. spin_unlock_irqrestore(&rpc->e_lock, flags);
  579. return NULL;
  580. }
  581. e_source = &rpc->e_sources[rpc->cons_idx];
  582. rpc->cons_idx++;
  583. if (rpc->cons_idx == AER_ERROR_SOURCES_MAX)
  584. rpc->cons_idx = 0;
  585. spin_unlock_irqrestore(&rpc->e_lock, flags);
  586. return e_source;
  587. }
  588. /**
  589. * get_device_error_info - read error status from dev and store it to info
  590. * @dev: pointer to the device expected to have a error record
  591. * @info: pointer to structure to store the error record
  592. *
  593. * Return 1 on success, 0 on error.
  594. */
  595. static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
  596. {
  597. int pos, temp;
  598. info->status = 0;
  599. info->tlp_header_valid = 0;
  600. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  601. /* The device might not support AER */
  602. if (!pos)
  603. return 1;
  604. if (info->severity == AER_CORRECTABLE) {
  605. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  606. &info->status);
  607. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK,
  608. &info->mask);
  609. if (!(info->status & ~info->mask))
  610. return 0;
  611. } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE ||
  612. info->severity == AER_NONFATAL) {
  613. /* Link is still healthy for IO reads */
  614. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS,
  615. &info->status);
  616. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK,
  617. &info->mask);
  618. if (!(info->status & ~info->mask))
  619. return 0;
  620. /* Get First Error Pointer */
  621. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp);
  622. info->first_error = PCI_ERR_CAP_FEP(temp);
  623. if (info->status & AER_LOG_TLP_MASKS) {
  624. info->tlp_header_valid = 1;
  625. pci_read_config_dword(dev,
  626. pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
  627. pci_read_config_dword(dev,
  628. pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
  629. pci_read_config_dword(dev,
  630. pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
  631. pci_read_config_dword(dev,
  632. pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
  633. }
  634. }
  635. return 1;
  636. }
  637. static inline void aer_process_err_devices(struct pcie_device *p_device,
  638. struct aer_err_info *e_info)
  639. {
  640. int i;
  641. if (!e_info->dev[0]) {
  642. dev_printk(KERN_DEBUG, &p_device->port->dev,
  643. "can't find device of ID%04x\n",
  644. e_info->id);
  645. }
  646. /* Report all before handle them, not to lost records by reset etc. */
  647. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  648. if (get_device_error_info(e_info->dev[i], e_info))
  649. aer_print_error(e_info->dev[i], e_info);
  650. }
  651. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  652. if (get_device_error_info(e_info->dev[i], e_info))
  653. handle_error_source(p_device, e_info->dev[i], e_info);
  654. }
  655. }
  656. /**
  657. * aer_isr_one_error - consume an error detected by root port
  658. * @p_device: pointer to error root port service device
  659. * @e_src: pointer to an error source
  660. */
  661. static void aer_isr_one_error(struct pcie_device *p_device,
  662. struct aer_err_source *e_src)
  663. {
  664. struct aer_err_info *e_info;
  665. int i;
  666. /* struct aer_err_info might be big, so we allocate it with slab */
  667. e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL);
  668. if (e_info == NULL) {
  669. dev_printk(KERN_DEBUG, &p_device->port->dev,
  670. "Can't allocate mem when processing AER errors\n");
  671. return;
  672. }
  673. /*
  674. * There is a possibility that both correctable error and
  675. * uncorrectable error being logged. Report correctable error first.
  676. */
  677. for (i = 1; i & ROOT_ERR_STATUS_MASKS ; i <<= 2) {
  678. if (i > 4)
  679. break;
  680. if (!(e_src->status & i))
  681. continue;
  682. memset(e_info, 0, sizeof(struct aer_err_info));
  683. /* Init comprehensive error information */
  684. if (i & PCI_ERR_ROOT_COR_RCV) {
  685. e_info->id = ERR_COR_ID(e_src->id);
  686. e_info->severity = AER_CORRECTABLE;
  687. } else {
  688. e_info->id = ERR_UNCOR_ID(e_src->id);
  689. e_info->severity = ((e_src->status >> 6) & 1);
  690. }
  691. if (e_src->status &
  692. (PCI_ERR_ROOT_MULTI_COR_RCV |
  693. PCI_ERR_ROOT_MULTI_UNCOR_RCV))
  694. e_info->multi_error_valid = 1;
  695. aer_print_port_info(p_device->port, e_info);
  696. find_source_device(p_device->port, e_info);
  697. aer_process_err_devices(p_device, e_info);
  698. }
  699. kfree(e_info);
  700. }
  701. /**
  702. * aer_isr - consume errors detected by root port
  703. * @work: definition of this work item
  704. *
  705. * Invoked, as DPC, when root port records new detected error
  706. */
  707. void aer_isr(struct work_struct *work)
  708. {
  709. struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
  710. struct pcie_device *p_device = rpc->rpd;
  711. struct aer_err_source *e_src;
  712. mutex_lock(&rpc->rpc_mutex);
  713. e_src = get_e_source(rpc);
  714. while (e_src) {
  715. aer_isr_one_error(p_device, e_src);
  716. e_src = get_e_source(rpc);
  717. }
  718. mutex_unlock(&rpc->rpc_mutex);
  719. wake_up(&rpc->wait_release);
  720. }
  721. /**
  722. * aer_delete_rootport - disable root port aer and delete service data
  723. * @rpc: pointer to a root port device being deleted
  724. *
  725. * Invoked when AER service unloaded on a specific Root Port
  726. */
  727. void aer_delete_rootport(struct aer_rpc *rpc)
  728. {
  729. /* Disable root port AER itself */
  730. disable_root_aer(rpc);
  731. kfree(rpc);
  732. }
  733. /**
  734. * aer_init - provide AER initialization
  735. * @dev: pointer to AER pcie device
  736. *
  737. * Invoked when AER service driver is loaded.
  738. */
  739. int aer_init(struct pcie_device *dev)
  740. {
  741. if (dev->port->aer_firmware_first) {
  742. dev_printk(KERN_DEBUG, &dev->device,
  743. "PCIe errors handled by platform firmware.\n");
  744. goto out;
  745. }
  746. if (aer_osc_setup(dev))
  747. goto out;
  748. return 0;
  749. out:
  750. if (forceload) {
  751. dev_printk(KERN_DEBUG, &dev->device,
  752. "aerdrv forceload requested.\n");
  753. dev->port->aer_firmware_first = 0;
  754. return 0;
  755. }
  756. return -ENXIO;
  757. }