unaligned.c 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609
  1. /*
  2. * Copyright 2013 Tilera Corporation. All Rights Reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation, version 2.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11. * NON INFRINGEMENT. See the GNU General Public License for
  12. * more details.
  13. *
  14. * A code-rewriter that handles unaligned exception.
  15. */
  16. #include <linux/smp.h>
  17. #include <linux/ptrace.h>
  18. #include <linux/slab.h>
  19. #include <linux/thread_info.h>
  20. #include <linux/uaccess.h>
  21. #include <linux/mman.h>
  22. #include <linux/types.h>
  23. #include <linux/err.h>
  24. #include <linux/module.h>
  25. #include <linux/compat.h>
  26. #include <linux/prctl.h>
  27. #include <asm/cacheflush.h>
  28. #include <asm/traps.h>
  29. #include <asm/uaccess.h>
  30. #include <asm/unaligned.h>
  31. #include <arch/abi.h>
  32. #include <arch/spr_def.h>
  33. #include <arch/opcode.h>
  34. /*
  35. * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
  36. * exception is supported out of single_step.c
  37. */
  38. int unaligned_printk;
  39. static int __init setup_unaligned_printk(char *str)
  40. {
  41. long val;
  42. if (kstrtol(str, 0, &val) != 0)
  43. return 0;
  44. unaligned_printk = val;
  45. pr_info("Printk for each unaligned data accesses is %s\n",
  46. unaligned_printk ? "enabled" : "disabled");
  47. return 1;
  48. }
  49. __setup("unaligned_printk=", setup_unaligned_printk);
  50. unsigned int unaligned_fixup_count;
  51. #ifdef __tilegx__
  52. /*
  53. * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
  54. * The 1st 64-bit word saves fault PC address, 2nd word is the fault
  55. * instruction bundle followed by 14 JIT bundles.
  56. */
  57. struct unaligned_jit_fragment {
  58. unsigned long pc;
  59. tilegx_bundle_bits bundle;
  60. tilegx_bundle_bits insn[14];
  61. };
  62. /*
  63. * Check if a nop or fnop at bundle's pipeline X0.
  64. */
  65. static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
  66. {
  67. return (((get_UnaryOpcodeExtension_X0(bundle) ==
  68. NOP_UNARY_OPCODE_X0) &&
  69. (get_RRROpcodeExtension_X0(bundle) ==
  70. UNARY_RRR_0_OPCODE_X0) &&
  71. (get_Opcode_X0(bundle) ==
  72. RRR_0_OPCODE_X0)) ||
  73. ((get_UnaryOpcodeExtension_X0(bundle) ==
  74. FNOP_UNARY_OPCODE_X0) &&
  75. (get_RRROpcodeExtension_X0(bundle) ==
  76. UNARY_RRR_0_OPCODE_X0) &&
  77. (get_Opcode_X0(bundle) ==
  78. RRR_0_OPCODE_X0)));
  79. }
  80. /*
  81. * Check if nop or fnop at bundle's pipeline X1.
  82. */
  83. static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
  84. {
  85. return (((get_UnaryOpcodeExtension_X1(bundle) ==
  86. NOP_UNARY_OPCODE_X1) &&
  87. (get_RRROpcodeExtension_X1(bundle) ==
  88. UNARY_RRR_0_OPCODE_X1) &&
  89. (get_Opcode_X1(bundle) ==
  90. RRR_0_OPCODE_X1)) ||
  91. ((get_UnaryOpcodeExtension_X1(bundle) ==
  92. FNOP_UNARY_OPCODE_X1) &&
  93. (get_RRROpcodeExtension_X1(bundle) ==
  94. UNARY_RRR_0_OPCODE_X1) &&
  95. (get_Opcode_X1(bundle) ==
  96. RRR_0_OPCODE_X1)));
  97. }
  98. /*
  99. * Check if nop or fnop at bundle's Y0 pipeline.
  100. */
  101. static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
  102. {
  103. return (((get_UnaryOpcodeExtension_Y0(bundle) ==
  104. NOP_UNARY_OPCODE_Y0) &&
  105. (get_RRROpcodeExtension_Y0(bundle) ==
  106. UNARY_RRR_1_OPCODE_Y0) &&
  107. (get_Opcode_Y0(bundle) ==
  108. RRR_1_OPCODE_Y0)) ||
  109. ((get_UnaryOpcodeExtension_Y0(bundle) ==
  110. FNOP_UNARY_OPCODE_Y0) &&
  111. (get_RRROpcodeExtension_Y0(bundle) ==
  112. UNARY_RRR_1_OPCODE_Y0) &&
  113. (get_Opcode_Y0(bundle) ==
  114. RRR_1_OPCODE_Y0)));
  115. }
  116. /*
  117. * Check if nop or fnop at bundle's pipeline Y1.
  118. */
  119. static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
  120. {
  121. return (((get_UnaryOpcodeExtension_Y1(bundle) ==
  122. NOP_UNARY_OPCODE_Y1) &&
  123. (get_RRROpcodeExtension_Y1(bundle) ==
  124. UNARY_RRR_1_OPCODE_Y1) &&
  125. (get_Opcode_Y1(bundle) ==
  126. RRR_1_OPCODE_Y1)) ||
  127. ((get_UnaryOpcodeExtension_Y1(bundle) ==
  128. FNOP_UNARY_OPCODE_Y1) &&
  129. (get_RRROpcodeExtension_Y1(bundle) ==
  130. UNARY_RRR_1_OPCODE_Y1) &&
  131. (get_Opcode_Y1(bundle) ==
  132. RRR_1_OPCODE_Y1)));
  133. }
  134. /*
  135. * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
  136. */
  137. static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
  138. {
  139. return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
  140. }
  141. /*
  142. * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
  143. */
  144. static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
  145. {
  146. return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
  147. }
  148. /*
  149. * Find the destination, source registers of fault unalign access instruction
  150. * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
  151. * clob3, which are guaranteed different from any register used in the fault
  152. * bundle. r_alias is used to return if the other instructions other than the
  153. * unalign load/store shares same register with ra, rb and rd.
  154. */
  155. static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
  156. uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
  157. uint64_t *clob3, bool *r_alias)
  158. {
  159. int i;
  160. uint64_t reg;
  161. uint64_t reg_map = 0, alias_reg_map = 0, map;
  162. bool alias;
  163. *ra = -1;
  164. *rb = -1;
  165. if (rd)
  166. *rd = -1;
  167. *clob1 = -1;
  168. *clob2 = -1;
  169. *clob3 = -1;
  170. alias = false;
  171. /*
  172. * Parse fault bundle, find potential used registers and mark
  173. * corresponding bits in reg_map and alias_map. These 2 bit maps
  174. * are used to find the scratch registers and determine if there
  175. * is register alais.
  176. */
  177. if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */
  178. reg = get_SrcA_Y2(bundle);
  179. reg_map |= 1ULL << reg;
  180. *ra = reg;
  181. reg = get_SrcBDest_Y2(bundle);
  182. reg_map |= 1ULL << reg;
  183. if (rd) {
  184. /* Load. */
  185. *rd = reg;
  186. alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
  187. } else {
  188. /* Store. */
  189. *rb = reg;
  190. alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
  191. }
  192. if (!is_bundle_y1_nop(bundle)) {
  193. reg = get_SrcA_Y1(bundle);
  194. reg_map |= (1ULL << reg);
  195. map = (1ULL << reg);
  196. reg = get_SrcB_Y1(bundle);
  197. reg_map |= (1ULL << reg);
  198. map |= (1ULL << reg);
  199. reg = get_Dest_Y1(bundle);
  200. reg_map |= (1ULL << reg);
  201. map |= (1ULL << reg);
  202. if (map & alias_reg_map)
  203. alias = true;
  204. }
  205. if (!is_bundle_y0_nop(bundle)) {
  206. reg = get_SrcA_Y0(bundle);
  207. reg_map |= (1ULL << reg);
  208. map = (1ULL << reg);
  209. reg = get_SrcB_Y0(bundle);
  210. reg_map |= (1ULL << reg);
  211. map |= (1ULL << reg);
  212. reg = get_Dest_Y0(bundle);
  213. reg_map |= (1ULL << reg);
  214. map |= (1ULL << reg);
  215. if (map & alias_reg_map)
  216. alias = true;
  217. }
  218. } else { /* X Mode Bundle. */
  219. reg = get_SrcA_X1(bundle);
  220. reg_map |= (1ULL << reg);
  221. *ra = reg;
  222. if (rd) {
  223. /* Load. */
  224. reg = get_Dest_X1(bundle);
  225. reg_map |= (1ULL << reg);
  226. *rd = reg;
  227. alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
  228. } else {
  229. /* Store. */
  230. reg = get_SrcB_X1(bundle);
  231. reg_map |= (1ULL << reg);
  232. *rb = reg;
  233. alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
  234. }
  235. if (!is_bundle_x0_nop(bundle)) {
  236. reg = get_SrcA_X0(bundle);
  237. reg_map |= (1ULL << reg);
  238. map = (1ULL << reg);
  239. reg = get_SrcB_X0(bundle);
  240. reg_map |= (1ULL << reg);
  241. map |= (1ULL << reg);
  242. reg = get_Dest_X0(bundle);
  243. reg_map |= (1ULL << reg);
  244. map |= (1ULL << reg);
  245. if (map & alias_reg_map)
  246. alias = true;
  247. }
  248. }
  249. /*
  250. * "alias" indicates if the unalign access registers have collision
  251. * with others in the same bundle. We jsut simply test all register
  252. * operands case (RRR), ignored the case with immidate. If a bundle
  253. * has no register alias, we may do fixup in a simple or fast manner.
  254. * So if an immidata field happens to hit with a register, we may end
  255. * up fall back to the generic handling.
  256. */
  257. *r_alias = alias;
  258. /* Flip bits on reg_map. */
  259. reg_map ^= -1ULL;
  260. /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
  261. for (i = 0; i < TREG_SP; i++) {
  262. if (reg_map & (0x1ULL << i)) {
  263. if (*clob1 == -1) {
  264. *clob1 = i;
  265. } else if (*clob2 == -1) {
  266. *clob2 = i;
  267. } else if (*clob3 == -1) {
  268. *clob3 = i;
  269. return;
  270. }
  271. }
  272. }
  273. }
  274. /*
  275. * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
  276. * is unexpected.
  277. */
  278. static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
  279. uint64_t clob1, uint64_t clob2, uint64_t clob3)
  280. {
  281. bool unexpected = false;
  282. if ((ra >= 56) && (ra != TREG_ZERO))
  283. unexpected = true;
  284. if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
  285. unexpected = true;
  286. if (rd != -1) {
  287. if ((rd >= 56) && (rd != TREG_ZERO))
  288. unexpected = true;
  289. } else {
  290. if ((rb >= 56) && (rb != TREG_ZERO))
  291. unexpected = true;
  292. }
  293. return unexpected;
  294. }
  295. #define GX_INSN_X0_MASK ((1ULL << 31) - 1)
  296. #define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31)
  297. #define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL))
  298. #define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31)
  299. #define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20))
  300. #ifdef __LITTLE_ENDIAN
  301. #define GX_INSN_BSWAP(_bundle_) (_bundle_)
  302. #else
  303. #define GX_INSN_BSWAP(_bundle_) swab64(_bundle_)
  304. #endif /* __LITTLE_ENDIAN */
  305. /*
  306. * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
  307. * The corresponding static function jix_x#_###(.) generates partial or
  308. * whole bundle based on the template and given arguments.
  309. */
  310. #define __JIT_CODE(_X_) \
  311. asm (".pushsection .rodata.unalign_data, \"a\"\n" \
  312. _X_"\n" \
  313. ".popsection\n")
  314. __JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}");
  315. static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
  316. {
  317. extern tilegx_bundle_bits __unalign_jit_x1_mtspr;
  318. return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
  319. create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
  320. }
  321. __JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}");
  322. static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr)
  323. {
  324. extern tilegx_bundle_bits __unalign_jit_x1_mfspr;
  325. return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
  326. create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
  327. }
  328. __JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}");
  329. static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8)
  330. {
  331. extern tilegx_bundle_bits __unalign_jit_x0_addi;
  332. return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
  333. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  334. create_Imm8_X0(imm8);
  335. }
  336. __JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}");
  337. static tilegx_bundle_bits jit_x1_ldna(int rd, int ra)
  338. {
  339. extern tilegx_bundle_bits __unalign_jit_x1_ldna;
  340. return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) |
  341. create_Dest_X1(rd) | create_SrcA_X1(ra);
  342. }
  343. __JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}");
  344. static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb)
  345. {
  346. extern tilegx_bundle_bits __unalign_jit_x0_dblalign;
  347. return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
  348. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  349. create_SrcB_X0(rb);
  350. }
  351. __JIT_CODE("__unalign_jit_x1_iret: {iret}");
  352. static tilegx_bundle_bits jit_x1_iret(void)
  353. {
  354. extern tilegx_bundle_bits __unalign_jit_x1_iret;
  355. return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
  356. }
  357. __JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}");
  358. static tilegx_bundle_bits jit_x0_fnop(void)
  359. {
  360. extern tilegx_bundle_bits __unalign_jit_x01_fnop;
  361. return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
  362. }
  363. static tilegx_bundle_bits jit_x1_fnop(void)
  364. {
  365. extern tilegx_bundle_bits __unalign_jit_x01_fnop;
  366. return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
  367. }
  368. __JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}");
  369. static tilegx_bundle_bits jit_y2_dummy(void)
  370. {
  371. extern tilegx_bundle_bits __unalign_jit_y2_dummy;
  372. return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
  373. }
  374. static tilegx_bundle_bits jit_y1_fnop(void)
  375. {
  376. extern tilegx_bundle_bits __unalign_jit_y2_dummy;
  377. return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
  378. }
  379. __JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}");
  380. static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8)
  381. {
  382. extern tilegx_bundle_bits __unalign_jit_x1_st1_add;
  383. return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
  384. (~create_SrcA_X1(-1)) &
  385. GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
  386. create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
  387. }
  388. __JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}");
  389. static tilegx_bundle_bits jit_x1_st(int ra, int rb)
  390. {
  391. extern tilegx_bundle_bits __unalign_jit_x1_st;
  392. return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
  393. create_SrcA_X1(ra) | create_SrcB_X1(rb);
  394. }
  395. __JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}");
  396. static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8)
  397. {
  398. extern tilegx_bundle_bits __unalign_jit_x1_st_add;
  399. return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
  400. (~create_SrcA_X1(-1)) &
  401. GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
  402. create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
  403. }
  404. __JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}");
  405. static tilegx_bundle_bits jit_x1_ld(int rd, int ra)
  406. {
  407. extern tilegx_bundle_bits __unalign_jit_x1_ld;
  408. return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
  409. create_Dest_X1(rd) | create_SrcA_X1(ra);
  410. }
  411. __JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}");
  412. static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8)
  413. {
  414. extern tilegx_bundle_bits __unalign_jit_x1_ld_add;
  415. return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
  416. (~create_Dest_X1(-1)) &
  417. GX_INSN_X1_MASK) | create_Dest_X1(rd) |
  418. create_SrcA_X1(ra) | create_Imm8_X1(imm8);
  419. }
  420. __JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}");
  421. static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
  422. {
  423. extern tilegx_bundle_bits __unalign_jit_x0_bfexts;
  424. return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
  425. GX_INSN_X0_MASK) |
  426. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  427. create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
  428. }
  429. __JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}");
  430. static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
  431. {
  432. extern tilegx_bundle_bits __unalign_jit_x0_bfextu;
  433. return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
  434. GX_INSN_X0_MASK) |
  435. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  436. create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
  437. }
  438. __JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
  439. static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8)
  440. {
  441. extern tilegx_bundle_bits __unalign_jit_x1_addi;
  442. return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
  443. create_Dest_X1(rd) | create_SrcA_X1(ra) |
  444. create_Imm8_X1(imm8);
  445. }
  446. __JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}");
  447. static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6)
  448. {
  449. extern tilegx_bundle_bits __unalign_jit_x0_shrui;
  450. return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
  451. GX_INSN_X0_MASK) |
  452. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  453. create_ShAmt_X0(imm6);
  454. }
  455. __JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}");
  456. static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6)
  457. {
  458. extern tilegx_bundle_bits __unalign_jit_x0_rotli;
  459. return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
  460. GX_INSN_X0_MASK) |
  461. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  462. create_ShAmt_X0(imm6);
  463. }
  464. __JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}");
  465. static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff)
  466. {
  467. extern tilegx_bundle_bits __unalign_jit_x1_bnezt;
  468. return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
  469. GX_INSN_X1_MASK) |
  470. create_SrcA_X1(ra) | create_BrOff_X1(broff);
  471. }
  472. #undef __JIT_CODE
  473. /*
  474. * This function generates unalign fixup JIT.
  475. *
  476. * We first find unalign load/store instruction's destination, source
  477. * registers: ra, rb and rd. and 3 scratch registers by calling
  478. * find_regs(...). 3 scratch clobbers should not alias with any register
  479. * used in the fault bundle. Then analyze the fault bundle to determine
  480. * if it's a load or store, operand width, branch or address increment etc.
  481. * At last generated JIT is copied into JIT code area in user space.
  482. */
  483. static
  484. void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
  485. int align_ctl)
  486. {
  487. struct thread_info *info = current_thread_info();
  488. struct unaligned_jit_fragment frag;
  489. struct unaligned_jit_fragment *jit_code_area;
  490. tilegx_bundle_bits bundle_2 = 0;
  491. /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
  492. bool bundle_2_enable = true;
  493. uint64_t ra, rb, rd = -1, clob1, clob2, clob3;
  494. /*
  495. * Indicate if the unalign access
  496. * instruction's registers hit with
  497. * others in the same bundle.
  498. */
  499. bool alias = false;
  500. bool load_n_store = true;
  501. bool load_store_signed = false;
  502. unsigned int load_store_size = 8;
  503. bool y1_br = false; /* True, for a branch in same bundle at Y1.*/
  504. int y1_br_reg = 0;
  505. /* True for link operation. i.e. jalr or lnk at Y1 */
  506. bool y1_lr = false;
  507. int y1_lr_reg = 0;
  508. bool x1_add = false;/* True, for load/store ADD instruction at X1*/
  509. int x1_add_imm8 = 0;
  510. bool unexpected = false;
  511. int n = 0, k;
  512. jit_code_area =
  513. (struct unaligned_jit_fragment *)(info->unalign_jit_base);
  514. memset((void *)&frag, 0, sizeof(frag));
  515. /* 0: X mode, Otherwise: Y mode. */
  516. if (bundle & TILEGX_BUNDLE_MODE_MASK) {
  517. unsigned int mod, opcode;
  518. if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
  519. get_RRROpcodeExtension_Y1(bundle) ==
  520. UNARY_RRR_1_OPCODE_Y1) {
  521. opcode = get_UnaryOpcodeExtension_Y1(bundle);
  522. /*
  523. * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
  524. * pipeline.
  525. */
  526. switch (opcode) {
  527. case JALR_UNARY_OPCODE_Y1:
  528. case JALRP_UNARY_OPCODE_Y1:
  529. y1_lr = true;
  530. y1_lr_reg = 55; /* Link register. */
  531. /* FALLTHROUGH */
  532. case JR_UNARY_OPCODE_Y1:
  533. case JRP_UNARY_OPCODE_Y1:
  534. y1_br = true;
  535. y1_br_reg = get_SrcA_Y1(bundle);
  536. break;
  537. case LNK_UNARY_OPCODE_Y1:
  538. /* "lnk" at Y1 pipeline. */
  539. y1_lr = true;
  540. y1_lr_reg = get_Dest_Y1(bundle);
  541. break;
  542. }
  543. }
  544. opcode = get_Opcode_Y2(bundle);
  545. mod = get_Mode(bundle);
  546. /*
  547. * bundle_2 is bundle after making Y2 as a dummy operation
  548. * - ld zero, sp
  549. */
  550. bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
  551. /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
  552. if (y1_br || y1_lr) {
  553. bundle_2 &= ~(GX_INSN_Y1_MASK);
  554. bundle_2 |= jit_y1_fnop();
  555. }
  556. if (is_y0_y1_nop(bundle_2))
  557. bundle_2_enable = false;
  558. if (mod == MODE_OPCODE_YC2) {
  559. /* Store. */
  560. load_n_store = false;
  561. load_store_size = 1 << opcode;
  562. load_store_signed = false;
  563. find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
  564. &clob3, &alias);
  565. if (load_store_size > 8)
  566. unexpected = true;
  567. } else {
  568. /* Load. */
  569. load_n_store = true;
  570. if (mod == MODE_OPCODE_YB2) {
  571. switch (opcode) {
  572. case LD_OPCODE_Y2:
  573. load_store_signed = false;
  574. load_store_size = 8;
  575. break;
  576. case LD4S_OPCODE_Y2:
  577. load_store_signed = true;
  578. load_store_size = 4;
  579. break;
  580. case LD4U_OPCODE_Y2:
  581. load_store_signed = false;
  582. load_store_size = 4;
  583. break;
  584. default:
  585. unexpected = true;
  586. }
  587. } else if (mod == MODE_OPCODE_YA2) {
  588. if (opcode == LD2S_OPCODE_Y2) {
  589. load_store_signed = true;
  590. load_store_size = 2;
  591. } else if (opcode == LD2U_OPCODE_Y2) {
  592. load_store_signed = false;
  593. load_store_size = 2;
  594. } else
  595. unexpected = true;
  596. } else
  597. unexpected = true;
  598. find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
  599. &clob3, &alias);
  600. }
  601. } else {
  602. unsigned int opcode;
  603. /* bundle_2 is bundle after making X1 as "fnop". */
  604. bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
  605. if (is_x0_x1_nop(bundle_2))
  606. bundle_2_enable = false;
  607. if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
  608. opcode = get_UnaryOpcodeExtension_X1(bundle);
  609. if (get_RRROpcodeExtension_X1(bundle) ==
  610. UNARY_RRR_0_OPCODE_X1) {
  611. load_n_store = true;
  612. find_regs(bundle, &rd, &ra, &rb, &clob1,
  613. &clob2, &clob3, &alias);
  614. switch (opcode) {
  615. case LD_UNARY_OPCODE_X1:
  616. load_store_signed = false;
  617. load_store_size = 8;
  618. break;
  619. case LD4S_UNARY_OPCODE_X1:
  620. load_store_signed = true;
  621. /* FALLTHROUGH */
  622. case LD4U_UNARY_OPCODE_X1:
  623. load_store_size = 4;
  624. break;
  625. case LD2S_UNARY_OPCODE_X1:
  626. load_store_signed = true;
  627. /* FALLTHROUGH */
  628. case LD2U_UNARY_OPCODE_X1:
  629. load_store_size = 2;
  630. break;
  631. default:
  632. unexpected = true;
  633. }
  634. } else {
  635. load_n_store = false;
  636. load_store_signed = false;
  637. find_regs(bundle, 0, &ra, &rb,
  638. &clob1, &clob2, &clob3,
  639. &alias);
  640. opcode = get_RRROpcodeExtension_X1(bundle);
  641. switch (opcode) {
  642. case ST_RRR_0_OPCODE_X1:
  643. load_store_size = 8;
  644. break;
  645. case ST4_RRR_0_OPCODE_X1:
  646. load_store_size = 4;
  647. break;
  648. case ST2_RRR_0_OPCODE_X1:
  649. load_store_size = 2;
  650. break;
  651. default:
  652. unexpected = true;
  653. }
  654. }
  655. } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
  656. load_n_store = true;
  657. opcode = get_Imm8OpcodeExtension_X1(bundle);
  658. switch (opcode) {
  659. case LD_ADD_IMM8_OPCODE_X1:
  660. load_store_size = 8;
  661. break;
  662. case LD4S_ADD_IMM8_OPCODE_X1:
  663. load_store_signed = true;
  664. /* FALLTHROUGH */
  665. case LD4U_ADD_IMM8_OPCODE_X1:
  666. load_store_size = 4;
  667. break;
  668. case LD2S_ADD_IMM8_OPCODE_X1:
  669. load_store_signed = true;
  670. /* FALLTHROUGH */
  671. case LD2U_ADD_IMM8_OPCODE_X1:
  672. load_store_size = 2;
  673. break;
  674. case ST_ADD_IMM8_OPCODE_X1:
  675. load_n_store = false;
  676. load_store_size = 8;
  677. break;
  678. case ST4_ADD_IMM8_OPCODE_X1:
  679. load_n_store = false;
  680. load_store_size = 4;
  681. break;
  682. case ST2_ADD_IMM8_OPCODE_X1:
  683. load_n_store = false;
  684. load_store_size = 2;
  685. break;
  686. default:
  687. unexpected = true;
  688. }
  689. if (!unexpected) {
  690. x1_add = true;
  691. if (load_n_store)
  692. x1_add_imm8 = get_Imm8_X1(bundle);
  693. else
  694. x1_add_imm8 = get_Dest_Imm8_X1(bundle);
  695. }
  696. find_regs(bundle, load_n_store ? (&rd) : NULL,
  697. &ra, &rb, &clob1, &clob2, &clob3, &alias);
  698. } else
  699. unexpected = true;
  700. }
  701. /*
  702. * Some sanity check for register numbers extracted from fault bundle.
  703. */
  704. if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
  705. unexpected = true;
  706. /* Give warning if register ra has an aligned address. */
  707. if (!unexpected)
  708. WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
  709. /*
  710. * Fault came from kernel space, here we only need take care of
  711. * unaligned "get_user/put_user" macros defined in "uaccess.h".
  712. * Basically, we will handle bundle like this:
  713. * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
  714. * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
  715. * For either load or store, byte-wise operation is performed by calling
  716. * get_user() or put_user(). If the macro returns non-zero value,
  717. * set the value to rx, otherwise set zero to rx. Finally make pc point
  718. * to next bundle and return.
  719. */
  720. if (EX1_PL(regs->ex1) != USER_PL) {
  721. unsigned long rx = 0;
  722. unsigned long x = 0, ret = 0;
  723. if (y1_br || y1_lr || x1_add ||
  724. (load_store_signed !=
  725. (load_n_store && load_store_size == 4))) {
  726. /* No branch, link, wrong sign-ext or load/store add. */
  727. unexpected = true;
  728. } else if (!unexpected) {
  729. if (bundle & TILEGX_BUNDLE_MODE_MASK) {
  730. /*
  731. * Fault bundle is Y mode.
  732. * Check if the Y1 and Y0 is the form of
  733. * { movei rx, 0; nop/fnop }, if yes,
  734. * find the rx.
  735. */
  736. if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
  737. && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
  738. (get_Imm8_Y1(bundle) == 0) &&
  739. is_bundle_y0_nop(bundle)) {
  740. rx = get_Dest_Y1(bundle);
  741. } else if ((get_Opcode_Y0(bundle) ==
  742. ADDI_OPCODE_Y0) &&
  743. (get_SrcA_Y0(bundle) == TREG_ZERO) &&
  744. (get_Imm8_Y0(bundle) == 0) &&
  745. is_bundle_y1_nop(bundle)) {
  746. rx = get_Dest_Y0(bundle);
  747. } else {
  748. unexpected = true;
  749. }
  750. } else {
  751. /*
  752. * Fault bundle is X mode.
  753. * Check if the X0 is 'movei rx, 0',
  754. * if yes, find the rx.
  755. */
  756. if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
  757. && (get_Imm8OpcodeExtension_X0(bundle) ==
  758. ADDI_IMM8_OPCODE_X0) &&
  759. (get_SrcA_X0(bundle) == TREG_ZERO) &&
  760. (get_Imm8_X0(bundle) == 0)) {
  761. rx = get_Dest_X0(bundle);
  762. } else {
  763. unexpected = true;
  764. }
  765. }
  766. /* rx should be less than 56. */
  767. if (!unexpected && (rx >= 56))
  768. unexpected = true;
  769. }
  770. if (!search_exception_tables(regs->pc)) {
  771. /* No fixup in the exception tables for the pc. */
  772. unexpected = true;
  773. }
  774. if (unexpected) {
  775. /* Unexpected unalign kernel fault. */
  776. struct task_struct *tsk = validate_current();
  777. bust_spinlocks(1);
  778. show_regs(regs);
  779. if (unlikely(tsk->pid < 2)) {
  780. panic("Kernel unalign fault running %s!",
  781. tsk->pid ? "init" : "the idle task");
  782. }
  783. #ifdef SUPPORT_DIE
  784. die("Oops", regs);
  785. #endif
  786. bust_spinlocks(1);
  787. do_group_exit(SIGKILL);
  788. } else {
  789. unsigned long i, b = 0;
  790. unsigned char *ptr =
  791. (unsigned char *)regs->regs[ra];
  792. if (load_n_store) {
  793. /* handle get_user(x, ptr) */
  794. for (i = 0; i < load_store_size; i++) {
  795. ret = get_user(b, ptr++);
  796. if (!ret) {
  797. /* Success! update x. */
  798. #ifdef __LITTLE_ENDIAN
  799. x |= (b << (8 * i));
  800. #else
  801. x <<= 8;
  802. x |= b;
  803. #endif /* __LITTLE_ENDIAN */
  804. } else {
  805. x = 0;
  806. break;
  807. }
  808. }
  809. /* Sign-extend 4-byte loads. */
  810. if (load_store_size == 4)
  811. x = (long)(int)x;
  812. /* Set register rd. */
  813. regs->regs[rd] = x;
  814. /* Set register rx. */
  815. regs->regs[rx] = ret;
  816. /* Bump pc. */
  817. regs->pc += 8;
  818. } else {
  819. /* Handle put_user(x, ptr) */
  820. x = regs->regs[rb];
  821. #ifdef __LITTLE_ENDIAN
  822. b = x;
  823. #else
  824. /*
  825. * Swap x in order to store x from low
  826. * to high memory same as the
  827. * little-endian case.
  828. */
  829. switch (load_store_size) {
  830. case 8:
  831. b = swab64(x);
  832. break;
  833. case 4:
  834. b = swab32(x);
  835. break;
  836. case 2:
  837. b = swab16(x);
  838. break;
  839. }
  840. #endif /* __LITTLE_ENDIAN */
  841. for (i = 0; i < load_store_size; i++) {
  842. ret = put_user(b, ptr++);
  843. if (ret)
  844. break;
  845. /* Success! shift 1 byte. */
  846. b >>= 8;
  847. }
  848. /* Set register rx. */
  849. regs->regs[rx] = ret;
  850. /* Bump pc. */
  851. regs->pc += 8;
  852. }
  853. }
  854. unaligned_fixup_count++;
  855. if (unaligned_printk) {
  856. pr_info("%s/%d. Unalign fixup for kernel access "
  857. "to userspace %lx.",
  858. current->comm, current->pid, regs->regs[ra]);
  859. }
  860. /* Done! Return to the exception handler. */
  861. return;
  862. }
  863. if ((align_ctl == 0) || unexpected) {
  864. siginfo_t info = {
  865. .si_signo = SIGBUS,
  866. .si_code = BUS_ADRALN,
  867. .si_addr = (unsigned char __user *)0
  868. };
  869. if (unaligned_printk)
  870. pr_info("Unalign bundle: unexp @%llx, %llx",
  871. (unsigned long long)regs->pc,
  872. (unsigned long long)bundle);
  873. if (ra < 56) {
  874. unsigned long uaa = (unsigned long)regs->regs[ra];
  875. /* Set bus Address. */
  876. info.si_addr = (unsigned char __user *)uaa;
  877. }
  878. unaligned_fixup_count++;
  879. trace_unhandled_signal("unaligned fixup trap", regs,
  880. (unsigned long)info.si_addr, SIGBUS);
  881. force_sig_info(info.si_signo, &info, current);
  882. return;
  883. }
  884. #ifdef __LITTLE_ENDIAN
  885. #define UA_FIXUP_ADDR_DELTA 1
  886. #define UA_FIXUP_BFEXT_START(_B_) 0
  887. #define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1)
  888. #else /* __BIG_ENDIAN */
  889. #define UA_FIXUP_ADDR_DELTA -1
  890. #define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_))
  891. #define UA_FIXUP_BFEXT_END(_B_) 63
  892. #endif /* __LITTLE_ENDIAN */
  893. if ((ra != rb) && (rd != TREG_SP) && !alias &&
  894. !y1_br && !y1_lr && !x1_add) {
  895. /*
  896. * Simple case: ra != rb and no register alias found,
  897. * and no branch or link. This will be the majority.
  898. * We can do a little better for simplae case than the
  899. * generic scheme below.
  900. */
  901. if (!load_n_store) {
  902. /*
  903. * Simple store: ra != rb, no need for scratch register.
  904. * Just store and rotate to right bytewise.
  905. */
  906. #ifdef __BIG_ENDIAN
  907. frag.insn[n++] =
  908. jit_x0_addi(ra, ra, load_store_size - 1) |
  909. jit_x1_fnop();
  910. #endif /* __BIG_ENDIAN */
  911. for (k = 0; k < load_store_size; k++) {
  912. /* Store a byte. */
  913. frag.insn[n++] =
  914. jit_x0_rotli(rb, rb, 56) |
  915. jit_x1_st1_add(ra, rb,
  916. UA_FIXUP_ADDR_DELTA);
  917. }
  918. #ifdef __BIG_ENDIAN
  919. frag.insn[n] = jit_x1_addi(ra, ra, 1);
  920. #else
  921. frag.insn[n] = jit_x1_addi(ra, ra,
  922. -1 * load_store_size);
  923. #endif /* __LITTLE_ENDIAN */
  924. if (load_store_size == 8) {
  925. frag.insn[n] |= jit_x0_fnop();
  926. } else if (load_store_size == 4) {
  927. frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
  928. } else { /* = 2 */
  929. frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
  930. }
  931. n++;
  932. if (bundle_2_enable)
  933. frag.insn[n++] = bundle_2;
  934. frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
  935. } else {
  936. if (rd == ra) {
  937. /* Use two clobber registers: clob1/2. */
  938. frag.insn[n++] =
  939. jit_x0_addi(TREG_SP, TREG_SP, -16) |
  940. jit_x1_fnop();
  941. frag.insn[n++] =
  942. jit_x0_addi(clob1, ra, 7) |
  943. jit_x1_st_add(TREG_SP, clob1, -8);
  944. frag.insn[n++] =
  945. jit_x0_addi(clob2, ra, 0) |
  946. jit_x1_st(TREG_SP, clob2);
  947. frag.insn[n++] =
  948. jit_x0_fnop() |
  949. jit_x1_ldna(rd, ra);
  950. frag.insn[n++] =
  951. jit_x0_fnop() |
  952. jit_x1_ldna(clob1, clob1);
  953. /*
  954. * Note: we must make sure that rd must not
  955. * be sp. Recover clob1/2 from stack.
  956. */
  957. frag.insn[n++] =
  958. jit_x0_dblalign(rd, clob1, clob2) |
  959. jit_x1_ld_add(clob2, TREG_SP, 8);
  960. frag.insn[n++] =
  961. jit_x0_fnop() |
  962. jit_x1_ld_add(clob1, TREG_SP, 16);
  963. } else {
  964. /* Use one clobber register: clob1 only. */
  965. frag.insn[n++] =
  966. jit_x0_addi(TREG_SP, TREG_SP, -16) |
  967. jit_x1_fnop();
  968. frag.insn[n++] =
  969. jit_x0_addi(clob1, ra, 7) |
  970. jit_x1_st(TREG_SP, clob1);
  971. frag.insn[n++] =
  972. jit_x0_fnop() |
  973. jit_x1_ldna(rd, ra);
  974. frag.insn[n++] =
  975. jit_x0_fnop() |
  976. jit_x1_ldna(clob1, clob1);
  977. /*
  978. * Note: we must make sure that rd must not
  979. * be sp. Recover clob1 from stack.
  980. */
  981. frag.insn[n++] =
  982. jit_x0_dblalign(rd, clob1, ra) |
  983. jit_x1_ld_add(clob1, TREG_SP, 16);
  984. }
  985. if (bundle_2_enable)
  986. frag.insn[n++] = bundle_2;
  987. /*
  988. * For non 8-byte load, extract corresponding bytes and
  989. * signed extension.
  990. */
  991. if (load_store_size == 4) {
  992. if (load_store_signed)
  993. frag.insn[n++] =
  994. jit_x0_bfexts(
  995. rd, rd,
  996. UA_FIXUP_BFEXT_START(4),
  997. UA_FIXUP_BFEXT_END(4)) |
  998. jit_x1_fnop();
  999. else
  1000. frag.insn[n++] =
  1001. jit_x0_bfextu(
  1002. rd, rd,
  1003. UA_FIXUP_BFEXT_START(4),
  1004. UA_FIXUP_BFEXT_END(4)) |
  1005. jit_x1_fnop();
  1006. } else if (load_store_size == 2) {
  1007. if (load_store_signed)
  1008. frag.insn[n++] =
  1009. jit_x0_bfexts(
  1010. rd, rd,
  1011. UA_FIXUP_BFEXT_START(2),
  1012. UA_FIXUP_BFEXT_END(2)) |
  1013. jit_x1_fnop();
  1014. else
  1015. frag.insn[n++] =
  1016. jit_x0_bfextu(
  1017. rd, rd,
  1018. UA_FIXUP_BFEXT_START(2),
  1019. UA_FIXUP_BFEXT_END(2)) |
  1020. jit_x1_fnop();
  1021. }
  1022. frag.insn[n++] =
  1023. jit_x0_fnop() |
  1024. jit_x1_iret();
  1025. }
  1026. } else if (!load_n_store) {
  1027. /*
  1028. * Generic memory store cases: use 3 clobber registers.
  1029. *
  1030. * Alloc space for saveing clob2,1,3 on user's stack.
  1031. * register clob3 points to where clob2 saved, followed by
  1032. * clob1 and 3 from high to low memory.
  1033. */
  1034. frag.insn[n++] =
  1035. jit_x0_addi(TREG_SP, TREG_SP, -32) |
  1036. jit_x1_fnop();
  1037. frag.insn[n++] =
  1038. jit_x0_addi(clob3, TREG_SP, 16) |
  1039. jit_x1_st_add(TREG_SP, clob3, 8);
  1040. #ifdef __LITTLE_ENDIAN
  1041. frag.insn[n++] =
  1042. jit_x0_addi(clob1, ra, 0) |
  1043. jit_x1_st_add(TREG_SP, clob1, 8);
  1044. #else
  1045. frag.insn[n++] =
  1046. jit_x0_addi(clob1, ra, load_store_size - 1) |
  1047. jit_x1_st_add(TREG_SP, clob1, 8);
  1048. #endif
  1049. if (load_store_size == 8) {
  1050. /*
  1051. * We save one byte a time, not for fast, but compact
  1052. * code. After each store, data source register shift
  1053. * right one byte. unchanged after 8 stores.
  1054. */
  1055. frag.insn[n++] =
  1056. jit_x0_addi(clob2, TREG_ZERO, 7) |
  1057. jit_x1_st_add(TREG_SP, clob2, 16);
  1058. frag.insn[n++] =
  1059. jit_x0_rotli(rb, rb, 56) |
  1060. jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
  1061. frag.insn[n++] =
  1062. jit_x0_addi(clob2, clob2, -1) |
  1063. jit_x1_bnezt(clob2, -1);
  1064. frag.insn[n++] =
  1065. jit_x0_fnop() |
  1066. jit_x1_addi(clob2, y1_br_reg, 0);
  1067. } else if (load_store_size == 4) {
  1068. frag.insn[n++] =
  1069. jit_x0_addi(clob2, TREG_ZERO, 3) |
  1070. jit_x1_st_add(TREG_SP, clob2, 16);
  1071. frag.insn[n++] =
  1072. jit_x0_rotli(rb, rb, 56) |
  1073. jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
  1074. frag.insn[n++] =
  1075. jit_x0_addi(clob2, clob2, -1) |
  1076. jit_x1_bnezt(clob2, -1);
  1077. /*
  1078. * same as 8-byte case, but need shift another 4
  1079. * byte to recover rb for 4-byte store.
  1080. */
  1081. frag.insn[n++] = jit_x0_rotli(rb, rb, 32) |
  1082. jit_x1_addi(clob2, y1_br_reg, 0);
  1083. } else { /* =2 */
  1084. frag.insn[n++] =
  1085. jit_x0_addi(clob2, rb, 0) |
  1086. jit_x1_st_add(TREG_SP, clob2, 16);
  1087. for (k = 0; k < 2; k++) {
  1088. frag.insn[n++] =
  1089. jit_x0_shrui(rb, rb, 8) |
  1090. jit_x1_st1_add(clob1, rb,
  1091. UA_FIXUP_ADDR_DELTA);
  1092. }
  1093. frag.insn[n++] =
  1094. jit_x0_addi(rb, clob2, 0) |
  1095. jit_x1_addi(clob2, y1_br_reg, 0);
  1096. }
  1097. if (bundle_2_enable)
  1098. frag.insn[n++] = bundle_2;
  1099. if (y1_lr) {
  1100. frag.insn[n++] =
  1101. jit_x0_fnop() |
  1102. jit_x1_mfspr(y1_lr_reg,
  1103. SPR_EX_CONTEXT_0_0);
  1104. }
  1105. if (y1_br) {
  1106. frag.insn[n++] =
  1107. jit_x0_fnop() |
  1108. jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
  1109. clob2);
  1110. }
  1111. if (x1_add) {
  1112. frag.insn[n++] =
  1113. jit_x0_addi(ra, ra, x1_add_imm8) |
  1114. jit_x1_ld_add(clob2, clob3, -8);
  1115. } else {
  1116. frag.insn[n++] =
  1117. jit_x0_fnop() |
  1118. jit_x1_ld_add(clob2, clob3, -8);
  1119. }
  1120. frag.insn[n++] =
  1121. jit_x0_fnop() |
  1122. jit_x1_ld_add(clob1, clob3, -8);
  1123. frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3);
  1124. frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
  1125. } else {
  1126. /*
  1127. * Generic memory load cases.
  1128. *
  1129. * Alloc space for saveing clob1,2,3 on user's stack.
  1130. * register clob3 points to where clob1 saved, followed
  1131. * by clob2 and 3 from high to low memory.
  1132. */
  1133. frag.insn[n++] =
  1134. jit_x0_addi(TREG_SP, TREG_SP, -32) |
  1135. jit_x1_fnop();
  1136. frag.insn[n++] =
  1137. jit_x0_addi(clob3, TREG_SP, 16) |
  1138. jit_x1_st_add(TREG_SP, clob3, 8);
  1139. frag.insn[n++] =
  1140. jit_x0_addi(clob2, ra, 0) |
  1141. jit_x1_st_add(TREG_SP, clob2, 8);
  1142. if (y1_br) {
  1143. frag.insn[n++] =
  1144. jit_x0_addi(clob1, y1_br_reg, 0) |
  1145. jit_x1_st_add(TREG_SP, clob1, 16);
  1146. } else {
  1147. frag.insn[n++] =
  1148. jit_x0_fnop() |
  1149. jit_x1_st_add(TREG_SP, clob1, 16);
  1150. }
  1151. if (bundle_2_enable)
  1152. frag.insn[n++] = bundle_2;
  1153. if (y1_lr) {
  1154. frag.insn[n++] =
  1155. jit_x0_fnop() |
  1156. jit_x1_mfspr(y1_lr_reg,
  1157. SPR_EX_CONTEXT_0_0);
  1158. }
  1159. if (y1_br) {
  1160. frag.insn[n++] =
  1161. jit_x0_fnop() |
  1162. jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
  1163. clob1);
  1164. }
  1165. frag.insn[n++] =
  1166. jit_x0_addi(clob1, clob2, 7) |
  1167. jit_x1_ldna(rd, clob2);
  1168. frag.insn[n++] =
  1169. jit_x0_fnop() |
  1170. jit_x1_ldna(clob1, clob1);
  1171. frag.insn[n++] =
  1172. jit_x0_dblalign(rd, clob1, clob2) |
  1173. jit_x1_ld_add(clob1, clob3, -8);
  1174. if (x1_add) {
  1175. frag.insn[n++] =
  1176. jit_x0_addi(ra, ra, x1_add_imm8) |
  1177. jit_x1_ld_add(clob2, clob3, -8);
  1178. } else {
  1179. frag.insn[n++] =
  1180. jit_x0_fnop() |
  1181. jit_x1_ld_add(clob2, clob3, -8);
  1182. }
  1183. frag.insn[n++] =
  1184. jit_x0_fnop() |
  1185. jit_x1_ld(clob3, clob3);
  1186. if (load_store_size == 4) {
  1187. if (load_store_signed)
  1188. frag.insn[n++] =
  1189. jit_x0_bfexts(
  1190. rd, rd,
  1191. UA_FIXUP_BFEXT_START(4),
  1192. UA_FIXUP_BFEXT_END(4)) |
  1193. jit_x1_fnop();
  1194. else
  1195. frag.insn[n++] =
  1196. jit_x0_bfextu(
  1197. rd, rd,
  1198. UA_FIXUP_BFEXT_START(4),
  1199. UA_FIXUP_BFEXT_END(4)) |
  1200. jit_x1_fnop();
  1201. } else if (load_store_size == 2) {
  1202. if (load_store_signed)
  1203. frag.insn[n++] =
  1204. jit_x0_bfexts(
  1205. rd, rd,
  1206. UA_FIXUP_BFEXT_START(2),
  1207. UA_FIXUP_BFEXT_END(2)) |
  1208. jit_x1_fnop();
  1209. else
  1210. frag.insn[n++] =
  1211. jit_x0_bfextu(
  1212. rd, rd,
  1213. UA_FIXUP_BFEXT_START(2),
  1214. UA_FIXUP_BFEXT_END(2)) |
  1215. jit_x1_fnop();
  1216. }
  1217. frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
  1218. }
  1219. /* Max JIT bundle count is 14. */
  1220. WARN_ON(n > 14);
  1221. if (!unexpected) {
  1222. int status = 0;
  1223. int idx = (regs->pc >> 3) &
  1224. ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
  1225. frag.pc = regs->pc;
  1226. frag.bundle = bundle;
  1227. if (unaligned_printk) {
  1228. pr_info("%s/%d, Unalign fixup: pc=%lx "
  1229. "bundle=%lx %d %d %d %d %d %d %d %d.",
  1230. current->comm, current->pid,
  1231. (unsigned long)frag.pc,
  1232. (unsigned long)frag.bundle,
  1233. (int)alias, (int)rd, (int)ra,
  1234. (int)rb, (int)bundle_2_enable,
  1235. (int)y1_lr, (int)y1_br, (int)x1_add);
  1236. for (k = 0; k < n; k += 2)
  1237. pr_info("[%d] %016llx %016llx", k,
  1238. (unsigned long long)frag.insn[k],
  1239. (unsigned long long)frag.insn[k+1]);
  1240. }
  1241. /* Swap bundle byte order for big endian sys. */
  1242. #ifdef __BIG_ENDIAN
  1243. frag.bundle = GX_INSN_BSWAP(frag.bundle);
  1244. for (k = 0; k < n; k++)
  1245. frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
  1246. #endif /* __BIG_ENDIAN */
  1247. status = copy_to_user((void __user *)&jit_code_area[idx],
  1248. &frag, sizeof(frag));
  1249. if (status) {
  1250. /* Fail to copy JIT into user land. send SIGSEGV. */
  1251. siginfo_t info = {
  1252. .si_signo = SIGSEGV,
  1253. .si_code = SEGV_MAPERR,
  1254. .si_addr = (void __user *)&jit_code_area[idx]
  1255. };
  1256. pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
  1257. current->pid, current->comm,
  1258. (unsigned long long)&jit_code_area[idx]);
  1259. trace_unhandled_signal("segfault in unalign fixup",
  1260. regs,
  1261. (unsigned long)info.si_addr,
  1262. SIGSEGV);
  1263. force_sig_info(info.si_signo, &info, current);
  1264. return;
  1265. }
  1266. /* Do a cheaper increment, not accurate. */
  1267. unaligned_fixup_count++;
  1268. __flush_icache_range((unsigned long)&jit_code_area[idx],
  1269. (unsigned long)&jit_code_area[idx] +
  1270. sizeof(frag));
  1271. /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
  1272. __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
  1273. __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
  1274. /* Modify pc at the start of new JIT. */
  1275. regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
  1276. /* Set ICS in SPR_EX_CONTEXT_K_1. */
  1277. regs->ex1 = PL_ICS_EX1(USER_PL, 1);
  1278. }
  1279. }
  1280. /*
  1281. * C function to generate unalign data JIT. Called from unalign data
  1282. * interrupt handler.
  1283. *
  1284. * First check if unalign fix is disabled or exception did not not come from
  1285. * user space or sp register points to unalign address, if true, generate a
  1286. * SIGBUS. Then map a page into user space as JIT area if it is not mapped
  1287. * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
  1288. * back to exception handler.
  1289. *
  1290. * The exception handler will "iret" to new generated JIT code after
  1291. * restoring caller saved registers. In theory, the JIT code will perform
  1292. * another "iret" to resume user's program.
  1293. */
  1294. void do_unaligned(struct pt_regs *regs, int vecnum)
  1295. {
  1296. tilegx_bundle_bits __user *pc;
  1297. tilegx_bundle_bits bundle;
  1298. struct thread_info *info = current_thread_info();
  1299. int align_ctl;
  1300. /* Checks the per-process unaligned JIT flags */
  1301. align_ctl = unaligned_fixup;
  1302. switch (task_thread_info(current)->align_ctl) {
  1303. case PR_UNALIGN_NOPRINT:
  1304. align_ctl = 1;
  1305. break;
  1306. case PR_UNALIGN_SIGBUS:
  1307. align_ctl = 0;
  1308. break;
  1309. }
  1310. /* Enable iterrupt in order to access user land. */
  1311. local_irq_enable();
  1312. /*
  1313. * The fault came from kernel space. Two choices:
  1314. * (a) unaligned_fixup < 1, we will first call get/put_user fixup
  1315. * to return -EFAULT. If no fixup, simply panic the kernel.
  1316. * (b) unaligned_fixup >=1, we will try to fix the unaligned access
  1317. * if it was triggered by get_user/put_user() macros. Panic the
  1318. * kernel if it is not fixable.
  1319. */
  1320. if (EX1_PL(regs->ex1) != USER_PL) {
  1321. if (align_ctl < 1) {
  1322. unaligned_fixup_count++;
  1323. /* If exception came from kernel, try fix it up. */
  1324. if (fixup_exception(regs)) {
  1325. if (unaligned_printk)
  1326. pr_info("Unalign fixup: %d %llx @%llx",
  1327. (int)unaligned_fixup,
  1328. (unsigned long long)regs->ex1,
  1329. (unsigned long long)regs->pc);
  1330. return;
  1331. }
  1332. /* Not fixable. Go panic. */
  1333. panic("Unalign exception in Kernel. pc=%lx",
  1334. regs->pc);
  1335. return;
  1336. } else {
  1337. /*
  1338. * Try to fix the exception. If we can't, panic the
  1339. * kernel.
  1340. */
  1341. bundle = GX_INSN_BSWAP(
  1342. *((tilegx_bundle_bits *)(regs->pc)));
  1343. jit_bundle_gen(regs, bundle, align_ctl);
  1344. return;
  1345. }
  1346. }
  1347. /*
  1348. * Fault came from user with ICS or stack is not aligned.
  1349. * If so, we will trigger SIGBUS.
  1350. */
  1351. if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
  1352. siginfo_t info = {
  1353. .si_signo = SIGBUS,
  1354. .si_code = BUS_ADRALN,
  1355. .si_addr = (unsigned char __user *)0
  1356. };
  1357. if (unaligned_printk)
  1358. pr_info("Unalign fixup: %d %llx @%llx",
  1359. (int)unaligned_fixup,
  1360. (unsigned long long)regs->ex1,
  1361. (unsigned long long)regs->pc);
  1362. unaligned_fixup_count++;
  1363. trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
  1364. force_sig_info(info.si_signo, &info, current);
  1365. return;
  1366. }
  1367. /* Read the bundle casued the exception! */
  1368. pc = (tilegx_bundle_bits __user *)(regs->pc);
  1369. if (get_user(bundle, pc) != 0) {
  1370. /* Probably never be here since pc is valid user address.*/
  1371. siginfo_t info = {
  1372. .si_signo = SIGSEGV,
  1373. .si_code = SEGV_MAPERR,
  1374. .si_addr = (void __user *)pc
  1375. };
  1376. pr_err("Couldn't read instruction at %p trying to step\n", pc);
  1377. trace_unhandled_signal("segfault in unalign fixup", regs,
  1378. (unsigned long)info.si_addr, SIGSEGV);
  1379. force_sig_info(info.si_signo, &info, current);
  1380. return;
  1381. }
  1382. if (!info->unalign_jit_base) {
  1383. void __user *user_page;
  1384. /*
  1385. * Allocate a page in userland.
  1386. * For 64-bit processes we try to place the mapping far
  1387. * from anything else that might be going on (specifically
  1388. * 64 GB below the top of the user address space). If it
  1389. * happens not to be possible to put it there, it's OK;
  1390. * the kernel will choose another location and we'll
  1391. * remember it for later.
  1392. */
  1393. if (is_compat_task())
  1394. user_page = NULL;
  1395. else
  1396. user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
  1397. (current->pid << PAGE_SHIFT);
  1398. user_page = (void __user *) vm_mmap(NULL,
  1399. (unsigned long)user_page,
  1400. PAGE_SIZE,
  1401. PROT_EXEC | PROT_READ |
  1402. PROT_WRITE,
  1403. #ifdef CONFIG_HOMECACHE
  1404. MAP_CACHE_HOME_TASK |
  1405. #endif
  1406. MAP_PRIVATE |
  1407. MAP_ANONYMOUS,
  1408. 0);
  1409. if (IS_ERR((void __force *)user_page)) {
  1410. pr_err("Out of kernel pages trying do_mmap.\n");
  1411. return;
  1412. }
  1413. /* Save the address in the thread_info struct */
  1414. info->unalign_jit_base = user_page;
  1415. if (unaligned_printk)
  1416. pr_info("Unalign bundle: %d:%d, allocate page @%llx",
  1417. raw_smp_processor_id(), current->pid,
  1418. (unsigned long long)user_page);
  1419. }
  1420. /* Generate unalign JIT */
  1421. jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
  1422. }
  1423. #endif /* __tilegx__ */