op_model_xscale.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /**
  2. * @file op_model_xscale.c
  3. * XScale Performance Monitor Driver
  4. *
  5. * @remark Copyright 2000-2004 Deepak Saxena <dsaxena@mvista.com>
  6. * @remark Copyright 2000-2004 MontaVista Software Inc
  7. * @remark Copyright 2004 Dave Jiang <dave.jiang@intel.com>
  8. * @remark Copyright 2004 Intel Corporation
  9. * @remark Copyright 2004 Zwane Mwaikambo <zwane@arm.linux.org.uk>
  10. * @remark Copyright 2004 OProfile Authors
  11. *
  12. * @remark Read the file COPYING
  13. *
  14. * @author Zwane Mwaikambo
  15. */
  16. /* #define DEBUG */
  17. #include <linux/types.h>
  18. #include <linux/errno.h>
  19. #include <linux/sched.h>
  20. #include <linux/oprofile.h>
  21. #include <linux/interrupt.h>
  22. #include <asm/irq.h>
  23. #include <asm/system.h>
  24. #include "op_counter.h"
  25. #include "op_arm_model.h"
  26. #define PMU_ENABLE 0x001 /* Enable counters */
  27. #define PMN_RESET 0x002 /* Reset event counters */
  28. #define CCNT_RESET 0x004 /* Reset clock counter */
  29. #define PMU_RESET (CCNT_RESET | PMN_RESET)
  30. #define PMU_CNT64 0x008 /* Make CCNT count every 64th cycle */
  31. /* TODO do runtime detection */
  32. #ifdef CONFIG_ARCH_IOP310
  33. #define XSCALE_PMU_IRQ IRQ_XS80200_PMU
  34. #endif
  35. #ifdef CONFIG_ARCH_IOP321
  36. #define XSCALE_PMU_IRQ IRQ_IOP321_CORE_PMU
  37. #endif
  38. #ifdef CONFIG_ARCH_IOP331
  39. #define XSCALE_PMU_IRQ IRQ_IOP331_CORE_PMU
  40. #endif
  41. #ifdef CONFIG_ARCH_PXA
  42. #define XSCALE_PMU_IRQ IRQ_PMU
  43. #endif
  44. /*
  45. * Different types of events that can be counted by the XScale PMU
  46. * as used by Oprofile userspace. Here primarily for documentation
  47. * purposes.
  48. */
  49. #define EVT_ICACHE_MISS 0x00
  50. #define EVT_ICACHE_NO_DELIVER 0x01
  51. #define EVT_DATA_STALL 0x02
  52. #define EVT_ITLB_MISS 0x03
  53. #define EVT_DTLB_MISS 0x04
  54. #define EVT_BRANCH 0x05
  55. #define EVT_BRANCH_MISS 0x06
  56. #define EVT_INSTRUCTION 0x07
  57. #define EVT_DCACHE_FULL_STALL 0x08
  58. #define EVT_DCACHE_FULL_STALL_CONTIG 0x09
  59. #define EVT_DCACHE_ACCESS 0x0A
  60. #define EVT_DCACHE_MISS 0x0B
  61. #define EVT_DCACE_WRITE_BACK 0x0C
  62. #define EVT_PC_CHANGED 0x0D
  63. #define EVT_BCU_REQUEST 0x10
  64. #define EVT_BCU_FULL 0x11
  65. #define EVT_BCU_DRAIN 0x12
  66. #define EVT_BCU_ECC_NO_ELOG 0x14
  67. #define EVT_BCU_1_BIT_ERR 0x15
  68. #define EVT_RMW 0x16
  69. /* EVT_CCNT is not hardware defined */
  70. #define EVT_CCNT 0xFE
  71. #define EVT_UNUSED 0xFF
  72. struct pmu_counter {
  73. volatile unsigned long ovf;
  74. unsigned long reset_counter;
  75. };
  76. enum { CCNT, PMN0, PMN1, PMN2, PMN3, MAX_COUNTERS };
  77. static struct pmu_counter results[MAX_COUNTERS];
  78. /*
  79. * There are two versions of the PMU in current XScale processors
  80. * with differing register layouts and number of performance counters.
  81. * e.g. IOP321 is xsc1 whilst IOP331 is xsc2.
  82. * We detect which register layout to use in xscale_detect_pmu()
  83. */
  84. enum { PMU_XSC1, PMU_XSC2 };
  85. struct pmu_type {
  86. int id;
  87. char *name;
  88. int num_counters;
  89. unsigned int int_enable;
  90. unsigned int cnt_ovf[MAX_COUNTERS];
  91. unsigned int int_mask[MAX_COUNTERS];
  92. };
  93. static struct pmu_type pmu_parms[] = {
  94. {
  95. .id = PMU_XSC1,
  96. .name = "arm/xscale1",
  97. .num_counters = 3,
  98. .int_mask = { [PMN0] = 0x10, [PMN1] = 0x20,
  99. [CCNT] = 0x40 },
  100. .cnt_ovf = { [CCNT] = 0x400, [PMN0] = 0x100,
  101. [PMN1] = 0x200},
  102. },
  103. {
  104. .id = PMU_XSC2,
  105. .name = "arm/xscale2",
  106. .num_counters = 5,
  107. .int_mask = { [CCNT] = 0x01, [PMN0] = 0x02,
  108. [PMN1] = 0x04, [PMN2] = 0x08,
  109. [PMN3] = 0x10 },
  110. .cnt_ovf = { [CCNT] = 0x01, [PMN0] = 0x02,
  111. [PMN1] = 0x04, [PMN2] = 0x08,
  112. [PMN3] = 0x10 },
  113. },
  114. };
  115. static struct pmu_type *pmu;
  116. static void write_pmnc(u32 val)
  117. {
  118. if (pmu->id == PMU_XSC1) {
  119. /* upper 4bits and 7, 11 are write-as-0 */
  120. val &= 0xffff77f;
  121. __asm__ __volatile__ ("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
  122. } else {
  123. /* bits 4-23 are write-as-0, 24-31 are write ignored */
  124. val &= 0xf;
  125. __asm__ __volatile__ ("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
  126. }
  127. }
  128. static u32 read_pmnc(void)
  129. {
  130. u32 val;
  131. if (pmu->id == PMU_XSC1)
  132. __asm__ __volatile__ ("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
  133. else {
  134. __asm__ __volatile__ ("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
  135. /* bits 1-2 and 4-23 are read-unpredictable */
  136. val &= 0xff000009;
  137. }
  138. return val;
  139. }
  140. static u32 __xsc1_read_counter(int counter)
  141. {
  142. u32 val = 0;
  143. switch (counter) {
  144. case CCNT:
  145. __asm__ __volatile__ ("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
  146. break;
  147. case PMN0:
  148. __asm__ __volatile__ ("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
  149. break;
  150. case PMN1:
  151. __asm__ __volatile__ ("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
  152. break;
  153. }
  154. return val;
  155. }
  156. static u32 __xsc2_read_counter(int counter)
  157. {
  158. u32 val = 0;
  159. switch (counter) {
  160. case CCNT:
  161. __asm__ __volatile__ ("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
  162. break;
  163. case PMN0:
  164. __asm__ __volatile__ ("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
  165. break;
  166. case PMN1:
  167. __asm__ __volatile__ ("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
  168. break;
  169. case PMN2:
  170. __asm__ __volatile__ ("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
  171. break;
  172. case PMN3:
  173. __asm__ __volatile__ ("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
  174. break;
  175. }
  176. return val;
  177. }
  178. static u32 read_counter(int counter)
  179. {
  180. u32 val;
  181. if (pmu->id == PMU_XSC1)
  182. val = __xsc1_read_counter(counter);
  183. else
  184. val = __xsc2_read_counter(counter);
  185. return val;
  186. }
  187. static void __xsc1_write_counter(int counter, u32 val)
  188. {
  189. switch (counter) {
  190. case CCNT:
  191. __asm__ __volatile__ ("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
  192. break;
  193. case PMN0:
  194. __asm__ __volatile__ ("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
  195. break;
  196. case PMN1:
  197. __asm__ __volatile__ ("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
  198. break;
  199. }
  200. }
  201. static void __xsc2_write_counter(int counter, u32 val)
  202. {
  203. switch (counter) {
  204. case CCNT:
  205. __asm__ __volatile__ ("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
  206. break;
  207. case PMN0:
  208. __asm__ __volatile__ ("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
  209. break;
  210. case PMN1:
  211. __asm__ __volatile__ ("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
  212. break;
  213. case PMN2:
  214. __asm__ __volatile__ ("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
  215. break;
  216. case PMN3:
  217. __asm__ __volatile__ ("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
  218. break;
  219. }
  220. }
  221. static void write_counter(int counter, u32 val)
  222. {
  223. if (pmu->id == PMU_XSC1)
  224. __xsc1_write_counter(counter, val);
  225. else
  226. __xsc2_write_counter(counter, val);
  227. }
  228. static int xscale_setup_ctrs(void)
  229. {
  230. u32 evtsel, pmnc;
  231. int i;
  232. for (i = CCNT; i < MAX_COUNTERS; i++) {
  233. if (counter_config[i].enabled)
  234. continue;
  235. counter_config[i].event = EVT_UNUSED;
  236. }
  237. switch (pmu->id) {
  238. case PMU_XSC1:
  239. pmnc = (counter_config[PMN1].event << 20) | (counter_config[PMN0].event << 12);
  240. pr_debug("xscale_setup_ctrs: pmnc: %#08x\n", pmnc);
  241. write_pmnc(pmnc);
  242. break;
  243. case PMU_XSC2:
  244. evtsel = counter_config[PMN0].event | (counter_config[PMN1].event << 8) |
  245. (counter_config[PMN2].event << 16) | (counter_config[PMN3].event << 24);
  246. pr_debug("xscale_setup_ctrs: evtsel %#08x\n", evtsel);
  247. __asm__ __volatile__ ("mcr p14, 0, %0, c8, c1, 0" : : "r" (evtsel));
  248. break;
  249. }
  250. for (i = CCNT; i < MAX_COUNTERS; i++) {
  251. if (counter_config[i].event == EVT_UNUSED) {
  252. counter_config[i].event = 0;
  253. pmu->int_enable &= ~pmu->int_mask[i];
  254. continue;
  255. }
  256. results[i].reset_counter = counter_config[i].count;
  257. write_counter(i, -(u32)counter_config[i].count);
  258. pmu->int_enable |= pmu->int_mask[i];
  259. pr_debug("xscale_setup_ctrs: counter%d %#08x from %#08lx\n", i,
  260. read_counter(i), counter_config[i].count);
  261. }
  262. return 0;
  263. }
  264. static void inline __xsc1_check_ctrs(void)
  265. {
  266. int i;
  267. u32 pmnc = read_pmnc();
  268. /* NOTE: there's an A stepping errata that states if an overflow */
  269. /* bit already exists and another occurs, the previous */
  270. /* Overflow bit gets cleared. There's no workaround. */
  271. /* Fixed in B stepping or later */
  272. /* Write the value back to clear the overflow flags. Overflow */
  273. /* flags remain in pmnc for use below */
  274. write_pmnc(pmnc & ~PMU_ENABLE);
  275. for (i = CCNT; i <= PMN1; i++) {
  276. if (!(pmu->int_mask[i] & pmu->int_enable))
  277. continue;
  278. if (pmnc & pmu->cnt_ovf[i])
  279. results[i].ovf++;
  280. }
  281. }
  282. static void inline __xsc2_check_ctrs(void)
  283. {
  284. int i;
  285. u32 flag = 0, pmnc = read_pmnc();
  286. pmnc &= ~PMU_ENABLE;
  287. write_pmnc(pmnc);
  288. /* read overflow flag register */
  289. __asm__ __volatile__ ("mrc p14, 0, %0, c5, c1, 0" : "=r" (flag));
  290. for (i = CCNT; i <= PMN3; i++) {
  291. if (!(pmu->int_mask[i] & pmu->int_enable))
  292. continue;
  293. if (flag & pmu->cnt_ovf[i])
  294. results[i].ovf++;
  295. }
  296. /* writeback clears overflow bits */
  297. __asm__ __volatile__ ("mcr p14, 0, %0, c5, c1, 0" : : "r" (flag));
  298. }
  299. static irqreturn_t xscale_pmu_interrupt(int irq, void *arg, struct pt_regs *regs)
  300. {
  301. int i;
  302. u32 pmnc;
  303. if (pmu->id == PMU_XSC1)
  304. __xsc1_check_ctrs();
  305. else
  306. __xsc2_check_ctrs();
  307. for (i = CCNT; i < MAX_COUNTERS; i++) {
  308. if (!results[i].ovf)
  309. continue;
  310. write_counter(i, -(u32)results[i].reset_counter);
  311. oprofile_add_sample(regs, i);
  312. results[i].ovf--;
  313. }
  314. pmnc = read_pmnc() | PMU_ENABLE;
  315. write_pmnc(pmnc);
  316. return IRQ_HANDLED;
  317. }
  318. static void xscale_pmu_stop(void)
  319. {
  320. u32 pmnc = read_pmnc();
  321. pmnc &= ~PMU_ENABLE;
  322. write_pmnc(pmnc);
  323. free_irq(XSCALE_PMU_IRQ, results);
  324. }
  325. static int xscale_pmu_start(void)
  326. {
  327. int ret;
  328. u32 pmnc = read_pmnc();
  329. ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, SA_INTERRUPT,
  330. "XScale PMU", (void *)results);
  331. if (ret < 0) {
  332. printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n",
  333. XSCALE_PMU_IRQ);
  334. return ret;
  335. }
  336. if (pmu->id == PMU_XSC1)
  337. pmnc |= pmu->int_enable;
  338. else {
  339. __asm__ __volatile__ ("mcr p14, 0, %0, c4, c1, 0" : : "r" (pmu->int_enable));
  340. pmnc &= ~PMU_CNT64;
  341. }
  342. pmnc |= PMU_ENABLE;
  343. write_pmnc(pmnc);
  344. pr_debug("xscale_pmu_start: pmnc: %#08x mask: %08x\n", pmnc, pmu->int_enable);
  345. return 0;
  346. }
  347. static int xscale_detect_pmu(void)
  348. {
  349. int ret = 0;
  350. u32 id;
  351. id = (read_cpuid(CPUID_ID) >> 13) & 0x7;
  352. switch (id) {
  353. case 1:
  354. pmu = &pmu_parms[PMU_XSC1];
  355. break;
  356. case 2:
  357. pmu = &pmu_parms[PMU_XSC2];
  358. break;
  359. default:
  360. ret = -ENODEV;
  361. break;
  362. }
  363. if (!ret) {
  364. op_xscale_spec.name = pmu->name;
  365. op_xscale_spec.num_counters = pmu->num_counters;
  366. pr_debug("xscale_detect_pmu: detected %s PMU\n", pmu->name);
  367. }
  368. return ret;
  369. }
  370. struct op_arm_model_spec op_xscale_spec = {
  371. .init = xscale_detect_pmu,
  372. .setup_ctrs = xscale_setup_ctrs,
  373. .start = xscale_pmu_start,
  374. .stop = xscale_pmu_stop,
  375. };