aerdrv_core.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881
  1. /*
  2. * drivers/pci/pcie/aer/aerdrv_core.c
  3. *
  4. * This file is subject to the terms and conditions of the GNU General Public
  5. * License. See the file "COPYING" in the main directory of this archive
  6. * for more details.
  7. *
  8. * This file implements the core part of PCI-Express AER. When an pci-express
  9. * error is delivered, an error message will be collected and printed to
  10. * console, then, an error recovery procedure will be executed by following
  11. * the pci error recovery rules.
  12. *
  13. * Copyright (C) 2006 Intel Corp.
  14. * Tom Long Nguyen (tom.l.nguyen@intel.com)
  15. * Zhang Yanmin (yanmin.zhang@intel.com)
  16. *
  17. */
  18. #include <linux/module.h>
  19. #include <linux/pci.h>
  20. #include <linux/kernel.h>
  21. #include <linux/errno.h>
  22. #include <linux/pm.h>
  23. #include <linux/suspend.h>
  24. #include <linux/delay.h>
  25. #include "aerdrv.h"
  26. static int forceload;
  27. static int nosourceid;
  28. module_param(forceload, bool, 0);
  29. module_param(nosourceid, bool, 0);
  30. int pci_enable_pcie_error_reporting(struct pci_dev *dev)
  31. {
  32. u16 reg16 = 0;
  33. int pos;
  34. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  35. if (!pos)
  36. return -EIO;
  37. pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
  38. if (!pos)
  39. return -EIO;
  40. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  41. reg16 = reg16 |
  42. PCI_EXP_DEVCTL_CERE |
  43. PCI_EXP_DEVCTL_NFERE |
  44. PCI_EXP_DEVCTL_FERE |
  45. PCI_EXP_DEVCTL_URRE;
  46. pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
  47. return 0;
  48. }
  49. EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
  50. int pci_disable_pcie_error_reporting(struct pci_dev *dev)
  51. {
  52. u16 reg16 = 0;
  53. int pos;
  54. pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
  55. if (!pos)
  56. return -EIO;
  57. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  58. reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE |
  59. PCI_EXP_DEVCTL_NFERE |
  60. PCI_EXP_DEVCTL_FERE |
  61. PCI_EXP_DEVCTL_URRE);
  62. pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, reg16);
  63. return 0;
  64. }
  65. EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
  66. int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
  67. {
  68. int pos;
  69. u32 status, mask;
  70. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  71. if (!pos)
  72. return -EIO;
  73. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  74. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
  75. if (dev->error_state == pci_channel_io_normal)
  76. status &= ~mask; /* Clear corresponding nonfatal bits */
  77. else
  78. status &= mask; /* Clear corresponding fatal bits */
  79. pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
  80. return 0;
  81. }
  82. EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
  83. #if 0
  84. int pci_cleanup_aer_correct_error_status(struct pci_dev *dev)
  85. {
  86. int pos;
  87. u32 status;
  88. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  89. if (!pos)
  90. return -EIO;
  91. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  92. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
  93. return 0;
  94. }
  95. #endif /* 0 */
  96. static int set_device_error_reporting(struct pci_dev *dev, void *data)
  97. {
  98. bool enable = *((bool *)data);
  99. if (dev->pcie_type == PCIE_RC_PORT ||
  100. dev->pcie_type == PCIE_SW_UPSTREAM_PORT ||
  101. dev->pcie_type == PCIE_SW_DOWNSTREAM_PORT) {
  102. if (enable)
  103. pci_enable_pcie_error_reporting(dev);
  104. else
  105. pci_disable_pcie_error_reporting(dev);
  106. }
  107. if (enable)
  108. pcie_set_ecrc_checking(dev);
  109. return 0;
  110. }
  111. /**
  112. * set_downstream_devices_error_reporting - enable/disable the error reporting bits on the root port and its downstream ports.
  113. * @dev: pointer to root port's pci_dev data structure
  114. * @enable: true = enable error reporting, false = disable error reporting.
  115. */
  116. static void set_downstream_devices_error_reporting(struct pci_dev *dev,
  117. bool enable)
  118. {
  119. set_device_error_reporting(dev, &enable);
  120. if (!dev->subordinate)
  121. return;
  122. pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
  123. }
  124. static inline int compare_device_id(struct pci_dev *dev,
  125. struct aer_err_info *e_info)
  126. {
  127. if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) {
  128. /*
  129. * Device ID match
  130. */
  131. return 1;
  132. }
  133. return 0;
  134. }
  135. static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
  136. {
  137. if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
  138. e_info->dev[e_info->error_dev_num] = dev;
  139. e_info->error_dev_num++;
  140. return 1;
  141. }
  142. return 0;
  143. }
  144. #define PCI_BUS(x) (((x) >> 8) & 0xff)
  145. static int find_device_iter(struct pci_dev *dev, void *data)
  146. {
  147. int pos;
  148. u32 status;
  149. u32 mask;
  150. u16 reg16;
  151. int result;
  152. struct aer_err_info *e_info = (struct aer_err_info *)data;
  153. /*
  154. * When bus id is equal to 0, it might be a bad id
  155. * reported by root port.
  156. */
  157. if (!nosourceid && (PCI_BUS(e_info->id) != 0)) {
  158. result = compare_device_id(dev, e_info);
  159. if (result)
  160. add_error_device(e_info, dev);
  161. /*
  162. * If there is no multiple error, we stop
  163. * or continue based on the id comparing.
  164. */
  165. if (!e_info->multi_error_valid)
  166. return result;
  167. /*
  168. * If there are multiple errors and id does match,
  169. * We need continue to search other devices under
  170. * the root port. Return 0 means that.
  171. */
  172. if (result)
  173. return 0;
  174. }
  175. /*
  176. * When either
  177. * 1) nosourceid==y;
  178. * 2) bus id is equal to 0. Some ports might lose the bus
  179. * id of error source id;
  180. * 3) There are multiple errors and prior id comparing fails;
  181. * We check AER status registers to find the initial reporter.
  182. */
  183. if (atomic_read(&dev->enable_cnt) == 0)
  184. return 0;
  185. pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
  186. if (!pos)
  187. return 0;
  188. /* Check if AER is enabled */
  189. pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16);
  190. if (!(reg16 & (
  191. PCI_EXP_DEVCTL_CERE |
  192. PCI_EXP_DEVCTL_NFERE |
  193. PCI_EXP_DEVCTL_FERE |
  194. PCI_EXP_DEVCTL_URRE)))
  195. return 0;
  196. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  197. if (!pos)
  198. return 0;
  199. status = 0;
  200. mask = 0;
  201. if (e_info->severity == AER_CORRECTABLE) {
  202. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
  203. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask);
  204. if (status & ~mask) {
  205. add_error_device(e_info, dev);
  206. goto added;
  207. }
  208. } else {
  209. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
  210. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
  211. if (status & ~mask) {
  212. add_error_device(e_info, dev);
  213. goto added;
  214. }
  215. }
  216. return 0;
  217. added:
  218. if (e_info->multi_error_valid)
  219. return 0;
  220. else
  221. return 1;
  222. }
  223. /**
  224. * find_source_device - search through device hierarchy for source device
  225. * @parent: pointer to Root Port pci_dev data structure
  226. * @err_info: including detailed error information such like id
  227. *
  228. * Invoked when error is detected at the Root Port.
  229. */
  230. static void find_source_device(struct pci_dev *parent,
  231. struct aer_err_info *e_info)
  232. {
  233. struct pci_dev *dev = parent;
  234. int result;
  235. /* Is Root Port an agent that sends error message? */
  236. result = find_device_iter(dev, e_info);
  237. if (result)
  238. return;
  239. pci_walk_bus(parent->subordinate, find_device_iter, e_info);
  240. }
  241. static int report_error_detected(struct pci_dev *dev, void *data)
  242. {
  243. pci_ers_result_t vote;
  244. struct pci_error_handlers *err_handler;
  245. struct aer_broadcast_data *result_data;
  246. result_data = (struct aer_broadcast_data *) data;
  247. dev->error_state = result_data->state;
  248. if (!dev->driver ||
  249. !dev->driver->err_handler ||
  250. !dev->driver->err_handler->error_detected) {
  251. if (result_data->state == pci_channel_io_frozen &&
  252. !(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) {
  253. /*
  254. * In case of fatal recovery, if one of down-
  255. * stream device has no driver. We might be
  256. * unable to recover because a later insmod
  257. * of a driver for this device is unaware of
  258. * its hw state.
  259. */
  260. dev_printk(KERN_DEBUG, &dev->dev, "device has %s\n",
  261. dev->driver ?
  262. "no AER-aware driver" : "no driver");
  263. }
  264. return 0;
  265. }
  266. err_handler = dev->driver->err_handler;
  267. vote = err_handler->error_detected(dev, result_data->state);
  268. result_data->result = merge_result(result_data->result, vote);
  269. return 0;
  270. }
  271. static int report_mmio_enabled(struct pci_dev *dev, void *data)
  272. {
  273. pci_ers_result_t vote;
  274. struct pci_error_handlers *err_handler;
  275. struct aer_broadcast_data *result_data;
  276. result_data = (struct aer_broadcast_data *) data;
  277. if (!dev->driver ||
  278. !dev->driver->err_handler ||
  279. !dev->driver->err_handler->mmio_enabled)
  280. return 0;
  281. err_handler = dev->driver->err_handler;
  282. vote = err_handler->mmio_enabled(dev);
  283. result_data->result = merge_result(result_data->result, vote);
  284. return 0;
  285. }
  286. static int report_slot_reset(struct pci_dev *dev, void *data)
  287. {
  288. pci_ers_result_t vote;
  289. struct pci_error_handlers *err_handler;
  290. struct aer_broadcast_data *result_data;
  291. result_data = (struct aer_broadcast_data *) data;
  292. if (!dev->driver ||
  293. !dev->driver->err_handler ||
  294. !dev->driver->err_handler->slot_reset)
  295. return 0;
  296. err_handler = dev->driver->err_handler;
  297. vote = err_handler->slot_reset(dev);
  298. result_data->result = merge_result(result_data->result, vote);
  299. return 0;
  300. }
  301. static int report_resume(struct pci_dev *dev, void *data)
  302. {
  303. struct pci_error_handlers *err_handler;
  304. dev->error_state = pci_channel_io_normal;
  305. if (!dev->driver ||
  306. !dev->driver->err_handler ||
  307. !dev->driver->err_handler->resume)
  308. return 0;
  309. err_handler = dev->driver->err_handler;
  310. err_handler->resume(dev);
  311. return 0;
  312. }
  313. /**
  314. * broadcast_error_message - handle message broadcast to downstream drivers
  315. * @dev: pointer to from where in a hierarchy message is broadcasted down
  316. * @state: error state
  317. * @error_mesg: message to print
  318. * @cb: callback to be broadcasted
  319. *
  320. * Invoked during error recovery process. Once being invoked, the content
  321. * of error severity will be broadcasted to all downstream drivers in a
  322. * hierarchy in question.
  323. */
  324. static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
  325. enum pci_channel_state state,
  326. char *error_mesg,
  327. int (*cb)(struct pci_dev *, void *))
  328. {
  329. struct aer_broadcast_data result_data;
  330. dev_printk(KERN_DEBUG, &dev->dev, "broadcast %s message\n", error_mesg);
  331. result_data.state = state;
  332. if (cb == report_error_detected)
  333. result_data.result = PCI_ERS_RESULT_CAN_RECOVER;
  334. else
  335. result_data.result = PCI_ERS_RESULT_RECOVERED;
  336. if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) {
  337. /*
  338. * If the error is reported by a bridge, we think this error
  339. * is related to the downstream link of the bridge, so we
  340. * do error recovery on all subordinates of the bridge instead
  341. * of the bridge and clear the error status of the bridge.
  342. */
  343. if (cb == report_error_detected)
  344. dev->error_state = state;
  345. pci_walk_bus(dev->subordinate, cb, &result_data);
  346. if (cb == report_resume) {
  347. pci_cleanup_aer_uncorrect_error_status(dev);
  348. dev->error_state = pci_channel_io_normal;
  349. }
  350. } else {
  351. /*
  352. * If the error is reported by an end point, we think this
  353. * error is related to the upstream link of the end point.
  354. */
  355. pci_walk_bus(dev->bus, cb, &result_data);
  356. }
  357. return result_data.result;
  358. }
  359. struct find_aer_service_data {
  360. struct pcie_port_service_driver *aer_driver;
  361. int is_downstream;
  362. };
  363. static int find_aer_service_iter(struct device *device, void *data)
  364. {
  365. struct device_driver *driver;
  366. struct pcie_port_service_driver *service_driver;
  367. struct find_aer_service_data *result;
  368. result = (struct find_aer_service_data *) data;
  369. if (device->bus == &pcie_port_bus_type) {
  370. struct pcie_port_data *port_data;
  371. port_data = pci_get_drvdata(to_pcie_device(device)->port);
  372. if (port_data->port_type == PCIE_SW_DOWNSTREAM_PORT)
  373. result->is_downstream = 1;
  374. driver = device->driver;
  375. if (driver) {
  376. service_driver = to_service_driver(driver);
  377. if (service_driver->service == PCIE_PORT_SERVICE_AER) {
  378. result->aer_driver = service_driver;
  379. return 1;
  380. }
  381. }
  382. }
  383. return 0;
  384. }
  385. static void find_aer_service(struct pci_dev *dev,
  386. struct find_aer_service_data *data)
  387. {
  388. int retval;
  389. retval = device_for_each_child(&dev->dev, data, find_aer_service_iter);
  390. }
  391. static pci_ers_result_t reset_link(struct pcie_device *aerdev,
  392. struct pci_dev *dev)
  393. {
  394. struct pci_dev *udev;
  395. pci_ers_result_t status;
  396. struct find_aer_service_data data;
  397. if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)
  398. udev = dev;
  399. else
  400. udev = dev->bus->self;
  401. data.is_downstream = 0;
  402. data.aer_driver = NULL;
  403. find_aer_service(udev, &data);
  404. /*
  405. * Use the aer driver of the error agent firstly.
  406. * If it hasn't the aer driver, use the root port's
  407. */
  408. if (!data.aer_driver || !data.aer_driver->reset_link) {
  409. if (data.is_downstream &&
  410. aerdev->device.driver &&
  411. to_service_driver(aerdev->device.driver)->reset_link) {
  412. data.aer_driver =
  413. to_service_driver(aerdev->device.driver);
  414. } else {
  415. dev_printk(KERN_DEBUG, &dev->dev, "no link-reset "
  416. "support\n");
  417. return PCI_ERS_RESULT_DISCONNECT;
  418. }
  419. }
  420. status = data.aer_driver->reset_link(udev);
  421. if (status != PCI_ERS_RESULT_RECOVERED) {
  422. dev_printk(KERN_DEBUG, &dev->dev, "link reset at upstream "
  423. "device %s failed\n", pci_name(udev));
  424. return PCI_ERS_RESULT_DISCONNECT;
  425. }
  426. return status;
  427. }
  428. /**
  429. * do_recovery - handle nonfatal/fatal error recovery process
  430. * @aerdev: pointer to a pcie_device data structure of root port
  431. * @dev: pointer to a pci_dev data structure of agent detecting an error
  432. * @severity: error severity type
  433. *
  434. * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast
  435. * error detected message to all downstream drivers within a hierarchy in
  436. * question and return the returned code.
  437. */
  438. static pci_ers_result_t do_recovery(struct pcie_device *aerdev,
  439. struct pci_dev *dev,
  440. int severity)
  441. {
  442. pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED;
  443. enum pci_channel_state state;
  444. if (severity == AER_FATAL)
  445. state = pci_channel_io_frozen;
  446. else
  447. state = pci_channel_io_normal;
  448. status = broadcast_error_message(dev,
  449. state,
  450. "error_detected",
  451. report_error_detected);
  452. if (severity == AER_FATAL) {
  453. result = reset_link(aerdev, dev);
  454. if (result != PCI_ERS_RESULT_RECOVERED) {
  455. /* TODO: Should panic here? */
  456. return result;
  457. }
  458. }
  459. if (status == PCI_ERS_RESULT_CAN_RECOVER)
  460. status = broadcast_error_message(dev,
  461. state,
  462. "mmio_enabled",
  463. report_mmio_enabled);
  464. if (status == PCI_ERS_RESULT_NEED_RESET) {
  465. /*
  466. * TODO: Should call platform-specific
  467. * functions to reset slot before calling
  468. * drivers' slot_reset callbacks?
  469. */
  470. status = broadcast_error_message(dev,
  471. state,
  472. "slot_reset",
  473. report_slot_reset);
  474. }
  475. if (status == PCI_ERS_RESULT_RECOVERED)
  476. broadcast_error_message(dev,
  477. state,
  478. "resume",
  479. report_resume);
  480. return status;
  481. }
  482. /**
  483. * handle_error_source - handle logging error into an event log
  484. * @aerdev: pointer to pcie_device data structure of the root port
  485. * @dev: pointer to pci_dev data structure of error source device
  486. * @info: comprehensive error information
  487. *
  488. * Invoked when an error being detected by Root Port.
  489. */
  490. static void handle_error_source(struct pcie_device *aerdev,
  491. struct pci_dev *dev,
  492. struct aer_err_info *info)
  493. {
  494. pci_ers_result_t status = 0;
  495. int pos;
  496. if (info->severity == AER_CORRECTABLE) {
  497. /*
  498. * Correctable error does not need software intevention.
  499. * No need to go through error recovery process.
  500. */
  501. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  502. if (pos)
  503. pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  504. info->status);
  505. } else {
  506. status = do_recovery(aerdev, dev, info->severity);
  507. if (status == PCI_ERS_RESULT_RECOVERED) {
  508. dev_printk(KERN_DEBUG, &dev->dev, "AER driver "
  509. "successfully recovered\n");
  510. } else {
  511. /* TODO: Should kernel panic here? */
  512. dev_printk(KERN_DEBUG, &dev->dev, "AER driver didn't "
  513. "recover\n");
  514. }
  515. }
  516. }
  517. /**
  518. * aer_enable_rootport - enable Root Port's interrupts when receiving messages
  519. * @rpc: pointer to a Root Port data structure
  520. *
  521. * Invoked when PCIE bus loads AER service driver.
  522. */
  523. void aer_enable_rootport(struct aer_rpc *rpc)
  524. {
  525. struct pci_dev *pdev = rpc->rpd->port;
  526. int pos, aer_pos;
  527. u16 reg16;
  528. u32 reg32;
  529. pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
  530. /* Clear PCIE Capability's Device Status */
  531. pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, &reg16);
  532. pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16);
  533. /* Disable system error generation in response to error messages */
  534. pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, &reg16);
  535. reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK);
  536. pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16);
  537. aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
  538. /* Clear error status */
  539. pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32);
  540. pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
  541. pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32);
  542. pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
  543. pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32);
  544. pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);
  545. /*
  546. * Enable error reporting for the root port device and downstream port
  547. * devices.
  548. */
  549. set_downstream_devices_error_reporting(pdev, true);
  550. /* Enable Root Port's interrupt in response to error messages */
  551. pci_write_config_dword(pdev,
  552. aer_pos + PCI_ERR_ROOT_COMMAND,
  553. ROOT_PORT_INTR_ON_MESG_MASK);
  554. }
  555. /**
  556. * disable_root_aer - disable Root Port's interrupts when receiving messages
  557. * @rpc: pointer to a Root Port data structure
  558. *
  559. * Invoked when PCIE bus unloads AER service driver.
  560. */
  561. static void disable_root_aer(struct aer_rpc *rpc)
  562. {
  563. struct pci_dev *pdev = rpc->rpd->port;
  564. u32 reg32;
  565. int pos;
  566. /*
  567. * Disable error reporting for the root port device and downstream port
  568. * devices.
  569. */
  570. set_downstream_devices_error_reporting(pdev, false);
  571. pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
  572. /* Disable Root's interrupt in response to error messages */
  573. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0);
  574. /* Clear Root's error status reg */
  575. pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, &reg32);
  576. pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32);
  577. }
  578. /**
  579. * get_e_source - retrieve an error source
  580. * @rpc: pointer to the root port which holds an error
  581. *
  582. * Invoked by DPC handler to consume an error.
  583. */
  584. static struct aer_err_source *get_e_source(struct aer_rpc *rpc)
  585. {
  586. struct aer_err_source *e_source;
  587. unsigned long flags;
  588. /* Lock access to Root error producer/consumer index */
  589. spin_lock_irqsave(&rpc->e_lock, flags);
  590. if (rpc->prod_idx == rpc->cons_idx) {
  591. spin_unlock_irqrestore(&rpc->e_lock, flags);
  592. return NULL;
  593. }
  594. e_source = &rpc->e_sources[rpc->cons_idx];
  595. rpc->cons_idx++;
  596. if (rpc->cons_idx == AER_ERROR_SOURCES_MAX)
  597. rpc->cons_idx = 0;
  598. spin_unlock_irqrestore(&rpc->e_lock, flags);
  599. return e_source;
  600. }
  601. /**
  602. * get_device_error_info - read error status from dev and store it to info
  603. * @dev: pointer to the device expected to have a error record
  604. * @info: pointer to structure to store the error record
  605. *
  606. * Return 1 on success, 0 on error.
  607. */
  608. static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
  609. {
  610. int pos, temp;
  611. info->status = 0;
  612. info->tlp_header_valid = 0;
  613. pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
  614. /* The device might not support AER */
  615. if (!pos)
  616. return 1;
  617. if (info->severity == AER_CORRECTABLE) {
  618. pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
  619. &info->status);
  620. pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK,
  621. &info->mask);
  622. if (!(info->status & ~info->mask))
  623. return 0;
  624. } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE ||
  625. info->severity == AER_NONFATAL) {
  626. /* Link is still healthy for IO reads */
  627. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS,
  628. &info->status);
  629. pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK,
  630. &info->mask);
  631. if (!(info->status & ~info->mask))
  632. return 0;
  633. /* Get First Error Pointer */
  634. pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp);
  635. info->first_error = PCI_ERR_CAP_FEP(temp);
  636. if (info->status & AER_LOG_TLP_MASKS) {
  637. info->tlp_header_valid = 1;
  638. pci_read_config_dword(dev,
  639. pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
  640. pci_read_config_dword(dev,
  641. pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
  642. pci_read_config_dword(dev,
  643. pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
  644. pci_read_config_dword(dev,
  645. pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
  646. }
  647. }
  648. return 1;
  649. }
  650. static inline void aer_process_err_devices(struct pcie_device *p_device,
  651. struct aer_err_info *e_info)
  652. {
  653. int i;
  654. if (!e_info->dev[0]) {
  655. dev_printk(KERN_DEBUG, &p_device->port->dev,
  656. "can't find device of ID%04x\n",
  657. e_info->id);
  658. }
  659. /* Report all before handle them, not to lost records by reset etc. */
  660. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  661. if (get_device_error_info(e_info->dev[i], e_info))
  662. aer_print_error(e_info->dev[i], e_info);
  663. }
  664. for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
  665. if (get_device_error_info(e_info->dev[i], e_info))
  666. handle_error_source(p_device, e_info->dev[i], e_info);
  667. }
  668. }
  669. /**
  670. * aer_isr_one_error - consume an error detected by root port
  671. * @p_device: pointer to error root port service device
  672. * @e_src: pointer to an error source
  673. */
  674. static void aer_isr_one_error(struct pcie_device *p_device,
  675. struct aer_err_source *e_src)
  676. {
  677. struct aer_err_info *e_info;
  678. int i;
  679. /* struct aer_err_info might be big, so we allocate it with slab */
  680. e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL);
  681. if (e_info == NULL) {
  682. dev_printk(KERN_DEBUG, &p_device->port->dev,
  683. "Can't allocate mem when processing AER errors\n");
  684. return;
  685. }
  686. /*
  687. * There is a possibility that both correctable error and
  688. * uncorrectable error being logged. Report correctable error first.
  689. */
  690. for (i = 1; i & ROOT_ERR_STATUS_MASKS ; i <<= 2) {
  691. if (i > 4)
  692. break;
  693. if (!(e_src->status & i))
  694. continue;
  695. memset(e_info, 0, sizeof(struct aer_err_info));
  696. /* Init comprehensive error information */
  697. if (i & PCI_ERR_ROOT_COR_RCV) {
  698. e_info->id = ERR_COR_ID(e_src->id);
  699. e_info->severity = AER_CORRECTABLE;
  700. } else {
  701. e_info->id = ERR_UNCOR_ID(e_src->id);
  702. e_info->severity = ((e_src->status >> 6) & 1);
  703. }
  704. if (e_src->status &
  705. (PCI_ERR_ROOT_MULTI_COR_RCV |
  706. PCI_ERR_ROOT_MULTI_UNCOR_RCV))
  707. e_info->multi_error_valid = 1;
  708. aer_print_port_info(p_device->port, e_info);
  709. find_source_device(p_device->port, e_info);
  710. aer_process_err_devices(p_device, e_info);
  711. }
  712. kfree(e_info);
  713. }
  714. /**
  715. * aer_isr - consume errors detected by root port
  716. * @work: definition of this work item
  717. *
  718. * Invoked, as DPC, when root port records new detected error
  719. */
  720. void aer_isr(struct work_struct *work)
  721. {
  722. struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
  723. struct pcie_device *p_device = rpc->rpd;
  724. struct aer_err_source *e_src;
  725. mutex_lock(&rpc->rpc_mutex);
  726. e_src = get_e_source(rpc);
  727. while (e_src) {
  728. aer_isr_one_error(p_device, e_src);
  729. e_src = get_e_source(rpc);
  730. }
  731. mutex_unlock(&rpc->rpc_mutex);
  732. wake_up(&rpc->wait_release);
  733. }
  734. /**
  735. * aer_delete_rootport - disable root port aer and delete service data
  736. * @rpc: pointer to a root port device being deleted
  737. *
  738. * Invoked when AER service unloaded on a specific Root Port
  739. */
  740. void aer_delete_rootport(struct aer_rpc *rpc)
  741. {
  742. /* Disable root port AER itself */
  743. disable_root_aer(rpc);
  744. kfree(rpc);
  745. }
  746. /**
  747. * aer_init - provide AER initialization
  748. * @dev: pointer to AER pcie device
  749. *
  750. * Invoked when AER service driver is loaded.
  751. */
  752. int aer_init(struct pcie_device *dev)
  753. {
  754. if (aer_osc_setup(dev) && !forceload)
  755. return -ENXIO;
  756. return 0;
  757. }