unaligned.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554
  1. /*
  2. * Architecture-specific unaligned trap handling.
  3. *
  4. * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
  5. * Stephane Eranian <eranian@hpl.hp.com>
  6. * David Mosberger-Tang <davidm@hpl.hp.com>
  7. *
  8. * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
  9. * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
  10. * stacked register returns an undefined value; it does NOT trigger a
  11. * "rsvd register fault").
  12. * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
  13. * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  14. * 2001/01/17 Add support emulation of unaligned kernel accesses.
  15. */
  16. #include <linux/jiffies.h>
  17. #include <linux/kernel.h>
  18. #include <linux/sched.h>
  19. #include <linux/tty.h>
  20. #include <asm/intrinsics.h>
  21. #include <asm/processor.h>
  22. #include <asm/rse.h>
  23. #include <asm/uaccess.h>
  24. #include <asm/unaligned.h>
  25. extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
  26. #undef DEBUG_UNALIGNED_TRAP
  27. #ifdef DEBUG_UNALIGNED_TRAP
  28. # define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
  29. # define DDUMP(str,vp,len) dump(str, vp, len)
  30. static void
  31. dump (const char *str, void *vp, size_t len)
  32. {
  33. unsigned char *cp = vp;
  34. int i;
  35. printk("%s", str);
  36. for (i = 0; i < len; ++i)
  37. printk (" %02x", *cp++);
  38. printk("\n");
  39. }
  40. #else
  41. # define DPRINT(a...)
  42. # define DDUMP(str,vp,len)
  43. #endif
  44. #define IA64_FIRST_STACKED_GR 32
  45. #define IA64_FIRST_ROTATING_FR 32
  46. #define SIGN_EXT9 0xffffffffffffff00ul
  47. /*
  48. * sysctl settable hook which tells the kernel whether to honor the
  49. * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want
  50. * to allow the super user to enable/disable this for security reasons
  51. * (i.e. don't allow attacker to fill up logs with unaligned accesses).
  52. */
  53. int no_unaligned_warning;
  54. static int noprint_warning;
  55. /*
  56. * For M-unit:
  57. *
  58. * opcode | m | x6 |
  59. * --------|------|---------|
  60. * [40-37] | [36] | [35:30] |
  61. * --------|------|---------|
  62. * 4 | 1 | 6 | = 11 bits
  63. * --------------------------
  64. * However bits [31:30] are not directly useful to distinguish between
  65. * load/store so we can use [35:32] instead, which gives the following
  66. * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
  67. * checking the m-bit until later in the load/store emulation.
  68. */
  69. #define IA64_OPCODE_MASK 0x1ef
  70. #define IA64_OPCODE_SHIFT 32
  71. /*
  72. * Table C-28 Integer Load/Store
  73. *
  74. * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
  75. *
  76. * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
  77. * the address (bits [8:3]), so we must failed.
  78. */
  79. #define LD_OP 0x080
  80. #define LDS_OP 0x081
  81. #define LDA_OP 0x082
  82. #define LDSA_OP 0x083
  83. #define LDBIAS_OP 0x084
  84. #define LDACQ_OP 0x085
  85. /* 0x086, 0x087 are not relevant */
  86. #define LDCCLR_OP 0x088
  87. #define LDCNC_OP 0x089
  88. #define LDCCLRACQ_OP 0x08a
  89. #define ST_OP 0x08c
  90. #define STREL_OP 0x08d
  91. /* 0x08e,0x8f are not relevant */
  92. /*
  93. * Table C-29 Integer Load +Reg
  94. *
  95. * we use the ld->m (bit [36:36]) field to determine whether or not we have
  96. * a load/store of this form.
  97. */
  98. /*
  99. * Table C-30 Integer Load/Store +Imm
  100. *
  101. * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
  102. *
  103. * ld8.fill, st8.fill must be aligned because the Nat register are based on
  104. * the address, so we must fail and the program must be fixed.
  105. */
  106. #define LD_IMM_OP 0x0a0
  107. #define LDS_IMM_OP 0x0a1
  108. #define LDA_IMM_OP 0x0a2
  109. #define LDSA_IMM_OP 0x0a3
  110. #define LDBIAS_IMM_OP 0x0a4
  111. #define LDACQ_IMM_OP 0x0a5
  112. /* 0x0a6, 0xa7 are not relevant */
  113. #define LDCCLR_IMM_OP 0x0a8
  114. #define LDCNC_IMM_OP 0x0a9
  115. #define LDCCLRACQ_IMM_OP 0x0aa
  116. #define ST_IMM_OP 0x0ac
  117. #define STREL_IMM_OP 0x0ad
  118. /* 0x0ae,0xaf are not relevant */
  119. /*
  120. * Table C-32 Floating-point Load/Store
  121. */
  122. #define LDF_OP 0x0c0
  123. #define LDFS_OP 0x0c1
  124. #define LDFA_OP 0x0c2
  125. #define LDFSA_OP 0x0c3
  126. /* 0x0c6 is irrelevant */
  127. #define LDFCCLR_OP 0x0c8
  128. #define LDFCNC_OP 0x0c9
  129. /* 0x0cb is irrelevant */
  130. #define STF_OP 0x0cc
  131. /*
  132. * Table C-33 Floating-point Load +Reg
  133. *
  134. * we use the ld->m (bit [36:36]) field to determine whether or not we have
  135. * a load/store of this form.
  136. */
  137. /*
  138. * Table C-34 Floating-point Load/Store +Imm
  139. */
  140. #define LDF_IMM_OP 0x0e0
  141. #define LDFS_IMM_OP 0x0e1
  142. #define LDFA_IMM_OP 0x0e2
  143. #define LDFSA_IMM_OP 0x0e3
  144. /* 0x0e6 is irrelevant */
  145. #define LDFCCLR_IMM_OP 0x0e8
  146. #define LDFCNC_IMM_OP 0x0e9
  147. #define STF_IMM_OP 0x0ec
  148. typedef struct {
  149. unsigned long qp:6; /* [0:5] */
  150. unsigned long r1:7; /* [6:12] */
  151. unsigned long imm:7; /* [13:19] */
  152. unsigned long r3:7; /* [20:26] */
  153. unsigned long x:1; /* [27:27] */
  154. unsigned long hint:2; /* [28:29] */
  155. unsigned long x6_sz:2; /* [30:31] */
  156. unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
  157. unsigned long m:1; /* [36:36] */
  158. unsigned long op:4; /* [37:40] */
  159. unsigned long pad:23; /* [41:63] */
  160. } load_store_t;
  161. typedef enum {
  162. UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
  163. UPD_REG /* ldXZ r1=[r3],r2 */
  164. } update_t;
  165. /*
  166. * We use tables to keep track of the offsets of registers in the saved state.
  167. * This way we save having big switch/case statements.
  168. *
  169. * We use bit 0 to indicate switch_stack or pt_regs.
  170. * The offset is simply shifted by 1 bit.
  171. * A 2-byte value should be enough to hold any kind of offset
  172. *
  173. * In case the calling convention changes (and thus pt_regs/switch_stack)
  174. * simply use RSW instead of RPT or vice-versa.
  175. */
  176. #define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
  177. #define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
  178. #define RPT(x) (RPO(x) << 1)
  179. #define RSW(x) (1| RSO(x)<<1)
  180. #define GR_OFFS(x) (gr_info[x]>>1)
  181. #define GR_IN_SW(x) (gr_info[x] & 0x1)
  182. #define FR_OFFS(x) (fr_info[x]>>1)
  183. #define FR_IN_SW(x) (fr_info[x] & 0x1)
  184. static u16 gr_info[32]={
  185. 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
  186. RPT(r1), RPT(r2), RPT(r3),
  187. RSW(r4), RSW(r5), RSW(r6), RSW(r7),
  188. RPT(r8), RPT(r9), RPT(r10), RPT(r11),
  189. RPT(r12), RPT(r13), RPT(r14), RPT(r15),
  190. RPT(r16), RPT(r17), RPT(r18), RPT(r19),
  191. RPT(r20), RPT(r21), RPT(r22), RPT(r23),
  192. RPT(r24), RPT(r25), RPT(r26), RPT(r27),
  193. RPT(r28), RPT(r29), RPT(r30), RPT(r31)
  194. };
  195. static u16 fr_info[32]={
  196. 0, /* constant : WE SHOULD NEVER GET THIS */
  197. 0, /* constant : WE SHOULD NEVER GET THIS */
  198. RSW(f2), RSW(f3), RSW(f4), RSW(f5),
  199. RPT(f6), RPT(f7), RPT(f8), RPT(f9),
  200. RPT(f10), RPT(f11),
  201. RSW(f12), RSW(f13), RSW(f14),
  202. RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
  203. RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
  204. RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
  205. RSW(f30), RSW(f31)
  206. };
  207. /* Invalidate ALAT entry for integer register REGNO. */
  208. static void
  209. invala_gr (int regno)
  210. {
  211. # define F(reg) case reg: ia64_invala_gr(reg); break
  212. switch (regno) {
  213. F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
  214. F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
  215. F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
  216. F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
  217. F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
  218. F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
  219. F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
  220. F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
  221. F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
  222. F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
  223. F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
  224. F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
  225. F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
  226. F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
  227. F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
  228. F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
  229. }
  230. # undef F
  231. }
  232. /* Invalidate ALAT entry for floating-point register REGNO. */
  233. static void
  234. invala_fr (int regno)
  235. {
  236. # define F(reg) case reg: ia64_invala_fr(reg); break
  237. switch (regno) {
  238. F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
  239. F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
  240. F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
  241. F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
  242. F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
  243. F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
  244. F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
  245. F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
  246. F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
  247. F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
  248. F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
  249. F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
  250. F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
  251. F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
  252. F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
  253. F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
  254. }
  255. # undef F
  256. }
  257. static inline unsigned long
  258. rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
  259. {
  260. reg += rrb;
  261. if (reg >= sor)
  262. reg -= sor;
  263. return reg;
  264. }
  265. static void
  266. set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
  267. {
  268. struct switch_stack *sw = (struct switch_stack *) regs - 1;
  269. unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
  270. unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
  271. unsigned long rnats, nat_mask;
  272. unsigned long on_kbs;
  273. long sof = (regs->cr_ifs) & 0x7f;
  274. long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
  275. long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
  276. long ridx = r1 - 32;
  277. if (ridx >= sof) {
  278. /* this should never happen, as the "rsvd register fault" has higher priority */
  279. DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
  280. return;
  281. }
  282. if (ridx < sor)
  283. ridx = rotate_reg(sor, rrb_gr, ridx);
  284. DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
  285. r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
  286. on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
  287. addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
  288. if (addr >= kbs) {
  289. /* the register is on the kernel backing store: easy... */
  290. rnat_addr = ia64_rse_rnat_addr(addr);
  291. if ((unsigned long) rnat_addr >= sw->ar_bspstore)
  292. rnat_addr = &sw->ar_rnat;
  293. nat_mask = 1UL << ia64_rse_slot_num(addr);
  294. *addr = val;
  295. if (nat)
  296. *rnat_addr |= nat_mask;
  297. else
  298. *rnat_addr &= ~nat_mask;
  299. return;
  300. }
  301. if (!user_stack(current, regs)) {
  302. DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
  303. return;
  304. }
  305. bspstore = (unsigned long *)regs->ar_bspstore;
  306. ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
  307. bsp = ia64_rse_skip_regs(ubs_end, -sof);
  308. addr = ia64_rse_skip_regs(bsp, ridx);
  309. DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
  310. ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
  311. rnat_addr = ia64_rse_rnat_addr(addr);
  312. ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
  313. DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
  314. (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
  315. nat_mask = 1UL << ia64_rse_slot_num(addr);
  316. if (nat)
  317. rnats |= nat_mask;
  318. else
  319. rnats &= ~nat_mask;
  320. ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
  321. DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
  322. }
  323. static void
  324. get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
  325. {
  326. struct switch_stack *sw = (struct switch_stack *) regs - 1;
  327. unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
  328. unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
  329. unsigned long rnats, nat_mask;
  330. unsigned long on_kbs;
  331. long sof = (regs->cr_ifs) & 0x7f;
  332. long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
  333. long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
  334. long ridx = r1 - 32;
  335. if (ridx >= sof) {
  336. /* read of out-of-frame register returns an undefined value; 0 in our case. */
  337. DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
  338. goto fail;
  339. }
  340. if (ridx < sor)
  341. ridx = rotate_reg(sor, rrb_gr, ridx);
  342. DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
  343. r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
  344. on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
  345. addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
  346. if (addr >= kbs) {
  347. /* the register is on the kernel backing store: easy... */
  348. *val = *addr;
  349. if (nat) {
  350. rnat_addr = ia64_rse_rnat_addr(addr);
  351. if ((unsigned long) rnat_addr >= sw->ar_bspstore)
  352. rnat_addr = &sw->ar_rnat;
  353. nat_mask = 1UL << ia64_rse_slot_num(addr);
  354. *nat = (*rnat_addr & nat_mask) != 0;
  355. }
  356. return;
  357. }
  358. if (!user_stack(current, regs)) {
  359. DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
  360. goto fail;
  361. }
  362. bspstore = (unsigned long *)regs->ar_bspstore;
  363. ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
  364. bsp = ia64_rse_skip_regs(ubs_end, -sof);
  365. addr = ia64_rse_skip_regs(bsp, ridx);
  366. DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
  367. ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
  368. if (nat) {
  369. rnat_addr = ia64_rse_rnat_addr(addr);
  370. nat_mask = 1UL << ia64_rse_slot_num(addr);
  371. DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
  372. ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
  373. *nat = (rnats & nat_mask) != 0;
  374. }
  375. return;
  376. fail:
  377. *val = 0;
  378. if (nat)
  379. *nat = 0;
  380. return;
  381. }
  382. static void
  383. setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
  384. {
  385. struct switch_stack *sw = (struct switch_stack *) regs - 1;
  386. unsigned long addr;
  387. unsigned long bitmask;
  388. unsigned long *unat;
  389. /*
  390. * First takes care of stacked registers
  391. */
  392. if (regnum >= IA64_FIRST_STACKED_GR) {
  393. set_rse_reg(regs, regnum, val, nat);
  394. return;
  395. }
  396. /*
  397. * Using r0 as a target raises a General Exception fault which has higher priority
  398. * than the Unaligned Reference fault.
  399. */
  400. /*
  401. * Now look at registers in [0-31] range and init correct UNAT
  402. */
  403. if (GR_IN_SW(regnum)) {
  404. addr = (unsigned long)sw;
  405. unat = &sw->ar_unat;
  406. } else {
  407. addr = (unsigned long)regs;
  408. unat = &sw->caller_unat;
  409. }
  410. DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
  411. addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
  412. /*
  413. * add offset from base of struct
  414. * and do it !
  415. */
  416. addr += GR_OFFS(regnum);
  417. *(unsigned long *)addr = val;
  418. /*
  419. * We need to clear the corresponding UNAT bit to fully emulate the load
  420. * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
  421. */
  422. bitmask = 1UL << (addr >> 3 & 0x3f);
  423. DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
  424. if (nat) {
  425. *unat |= bitmask;
  426. } else {
  427. *unat &= ~bitmask;
  428. }
  429. DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
  430. }
  431. /*
  432. * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
  433. * range from 32-127, result is in the range from 0-95.
  434. */
  435. static inline unsigned long
  436. fph_index (struct pt_regs *regs, long regnum)
  437. {
  438. unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
  439. return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
  440. }
  441. static void
  442. setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
  443. {
  444. struct switch_stack *sw = (struct switch_stack *)regs - 1;
  445. unsigned long addr;
  446. /*
  447. * From EAS-2.5: FPDisableFault has higher priority than Unaligned
  448. * Fault. Thus, when we get here, we know the partition is enabled.
  449. * To update f32-f127, there are three choices:
  450. *
  451. * (1) save f32-f127 to thread.fph and update the values there
  452. * (2) use a gigantic switch statement to directly access the registers
  453. * (3) generate code on the fly to update the desired register
  454. *
  455. * For now, we are using approach (1).
  456. */
  457. if (regnum >= IA64_FIRST_ROTATING_FR) {
  458. ia64_sync_fph(current);
  459. current->thread.fph[fph_index(regs, regnum)] = *fpval;
  460. } else {
  461. /*
  462. * pt_regs or switch_stack ?
  463. */
  464. if (FR_IN_SW(regnum)) {
  465. addr = (unsigned long)sw;
  466. } else {
  467. addr = (unsigned long)regs;
  468. }
  469. DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
  470. addr += FR_OFFS(regnum);
  471. *(struct ia64_fpreg *)addr = *fpval;
  472. /*
  473. * mark the low partition as being used now
  474. *
  475. * It is highly unlikely that this bit is not already set, but
  476. * let's do it for safety.
  477. */
  478. regs->cr_ipsr |= IA64_PSR_MFL;
  479. }
  480. }
  481. /*
  482. * Those 2 inline functions generate the spilled versions of the constant floating point
  483. * registers which can be used with stfX
  484. */
  485. static inline void
  486. float_spill_f0 (struct ia64_fpreg *final)
  487. {
  488. ia64_stf_spill(final, 0);
  489. }
  490. static inline void
  491. float_spill_f1 (struct ia64_fpreg *final)
  492. {
  493. ia64_stf_spill(final, 1);
  494. }
  495. static void
  496. getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
  497. {
  498. struct switch_stack *sw = (struct switch_stack *) regs - 1;
  499. unsigned long addr;
  500. /*
  501. * From EAS-2.5: FPDisableFault has higher priority than
  502. * Unaligned Fault. Thus, when we get here, we know the partition is
  503. * enabled.
  504. *
  505. * When regnum > 31, the register is still live and we need to force a save
  506. * to current->thread.fph to get access to it. See discussion in setfpreg()
  507. * for reasons and other ways of doing this.
  508. */
  509. if (regnum >= IA64_FIRST_ROTATING_FR) {
  510. ia64_flush_fph(current);
  511. *fpval = current->thread.fph[fph_index(regs, regnum)];
  512. } else {
  513. /*
  514. * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
  515. * not saved, we must generate their spilled form on the fly
  516. */
  517. switch(regnum) {
  518. case 0:
  519. float_spill_f0(fpval);
  520. break;
  521. case 1:
  522. float_spill_f1(fpval);
  523. break;
  524. default:
  525. /*
  526. * pt_regs or switch_stack ?
  527. */
  528. addr = FR_IN_SW(regnum) ? (unsigned long)sw
  529. : (unsigned long)regs;
  530. DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
  531. FR_IN_SW(regnum), addr, FR_OFFS(regnum));
  532. addr += FR_OFFS(regnum);
  533. *fpval = *(struct ia64_fpreg *)addr;
  534. }
  535. }
  536. }
  537. static void
  538. getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
  539. {
  540. struct switch_stack *sw = (struct switch_stack *) regs - 1;
  541. unsigned long addr, *unat;
  542. if (regnum >= IA64_FIRST_STACKED_GR) {
  543. get_rse_reg(regs, regnum, val, nat);
  544. return;
  545. }
  546. /*
  547. * take care of r0 (read-only always evaluate to 0)
  548. */
  549. if (regnum == 0) {
  550. *val = 0;
  551. if (nat)
  552. *nat = 0;
  553. return;
  554. }
  555. /*
  556. * Now look at registers in [0-31] range and init correct UNAT
  557. */
  558. if (GR_IN_SW(regnum)) {
  559. addr = (unsigned long)sw;
  560. unat = &sw->ar_unat;
  561. } else {
  562. addr = (unsigned long)regs;
  563. unat = &sw->caller_unat;
  564. }
  565. DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
  566. addr += GR_OFFS(regnum);
  567. *val = *(unsigned long *)addr;
  568. /*
  569. * do it only when requested
  570. */
  571. if (nat)
  572. *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
  573. }
  574. static void
  575. emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
  576. {
  577. /*
  578. * IMPORTANT:
  579. * Given the way we handle unaligned speculative loads, we should
  580. * not get to this point in the code but we keep this sanity check,
  581. * just in case.
  582. */
  583. if (ld.x6_op == 1 || ld.x6_op == 3) {
  584. printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
  585. if (die_if_kernel("unaligned reference on speculative load with register update\n",
  586. regs, 30))
  587. return;
  588. }
  589. /*
  590. * at this point, we know that the base register to update is valid i.e.,
  591. * it's not r0
  592. */
  593. if (type == UPD_IMMEDIATE) {
  594. unsigned long imm;
  595. /*
  596. * Load +Imm: ldXZ r1=[r3],imm(9)
  597. *
  598. *
  599. * form imm9: [13:19] contain the first 7 bits
  600. */
  601. imm = ld.x << 7 | ld.imm;
  602. /*
  603. * sign extend (1+8bits) if m set
  604. */
  605. if (ld.m) imm |= SIGN_EXT9;
  606. /*
  607. * ifa == r3 and we know that the NaT bit on r3 was clear so
  608. * we can directly use ifa.
  609. */
  610. ifa += imm;
  611. setreg(ld.r3, ifa, 0, regs);
  612. DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
  613. } else if (ld.m) {
  614. unsigned long r2;
  615. int nat_r2;
  616. /*
  617. * Load +Reg Opcode: ldXZ r1=[r3],r2
  618. *
  619. * Note: that we update r3 even in the case of ldfX.a
  620. * (where the load does not happen)
  621. *
  622. * The way the load algorithm works, we know that r3 does not
  623. * have its NaT bit set (would have gotten NaT consumption
  624. * before getting the unaligned fault). So we can use ifa
  625. * which equals r3 at this point.
  626. *
  627. * IMPORTANT:
  628. * The above statement holds ONLY because we know that we
  629. * never reach this code when trying to do a ldX.s.
  630. * If we ever make it to here on an ldfX.s then
  631. */
  632. getreg(ld.imm, &r2, &nat_r2, regs);
  633. ifa += r2;
  634. /*
  635. * propagate Nat r2 -> r3
  636. */
  637. setreg(ld.r3, ifa, nat_r2, regs);
  638. DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
  639. }
  640. }
  641. static int
  642. emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
  643. {
  644. unsigned int len = 1 << ld.x6_sz;
  645. unsigned long val = 0;
  646. /*
  647. * r0, as target, doesn't need to be checked because Illegal Instruction
  648. * faults have higher priority than unaligned faults.
  649. *
  650. * r0 cannot be found as the base as it would never generate an
  651. * unaligned reference.
  652. */
  653. /*
  654. * ldX.a we will emulate load and also invalidate the ALAT entry.
  655. * See comment below for explanation on how we handle ldX.a
  656. */
  657. if (len != 2 && len != 4 && len != 8) {
  658. DPRINT("unknown size: x6=%d\n", ld.x6_sz);
  659. return -1;
  660. }
  661. /* this assumes little-endian byte-order: */
  662. if (copy_from_user(&val, (void __user *) ifa, len))
  663. return -1;
  664. setreg(ld.r1, val, 0, regs);
  665. /*
  666. * check for updates on any kind of loads
  667. */
  668. if (ld.op == 0x5 || ld.m)
  669. emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
  670. /*
  671. * handling of various loads (based on EAS2.4):
  672. *
  673. * ldX.acq (ordered load):
  674. * - acquire semantics would have been used, so force fence instead.
  675. *
  676. * ldX.c.clr (check load and clear):
  677. * - if we get to this handler, it's because the entry was not in the ALAT.
  678. * Therefore the operation reverts to a normal load
  679. *
  680. * ldX.c.nc (check load no clear):
  681. * - same as previous one
  682. *
  683. * ldX.c.clr.acq (ordered check load and clear):
  684. * - same as above for c.clr part. The load needs to have acquire semantics. So
  685. * we use the fence semantics which is stronger and thus ensures correctness.
  686. *
  687. * ldX.a (advanced load):
  688. * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
  689. * address doesn't match requested size alignment. This means that we would
  690. * possibly need more than one load to get the result.
  691. *
  692. * The load part can be handled just like a normal load, however the difficult
  693. * part is to get the right thing into the ALAT. The critical piece of information
  694. * in the base address of the load & size. To do that, a ld.a must be executed,
  695. * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
  696. * if we use the same target register, we will be okay for the check.a instruction.
  697. * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
  698. * which would overlap within [r3,r3+X] (the size of the load was store in the
  699. * ALAT). If such an entry is found the entry is invalidated. But this is not good
  700. * enough, take the following example:
  701. * r3=3
  702. * ld4.a r1=[r3]
  703. *
  704. * Could be emulated by doing:
  705. * ld1.a r1=[r3],1
  706. * store to temporary;
  707. * ld1.a r1=[r3],1
  708. * store & shift to temporary;
  709. * ld1.a r1=[r3],1
  710. * store & shift to temporary;
  711. * ld1.a r1=[r3]
  712. * store & shift to temporary;
  713. * r1=temporary
  714. *
  715. * So in this case, you would get the right value is r1 but the wrong info in
  716. * the ALAT. Notice that you could do it in reverse to finish with address 3
  717. * but you would still get the size wrong. To get the size right, one needs to
  718. * execute exactly the same kind of load. You could do it from a aligned
  719. * temporary location, but you would get the address wrong.
  720. *
  721. * So no matter what, it is not possible to emulate an advanced load
  722. * correctly. But is that really critical ?
  723. *
  724. * We will always convert ld.a into a normal load with ALAT invalidated. This
  725. * will enable compiler to do optimization where certain code path after ld.a
  726. * is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
  727. *
  728. * If there is a store after the advanced load, one must either do a ld.c.* or
  729. * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
  730. * entry found in ALAT), and that's perfectly ok because:
  731. *
  732. * - ld.c.*, if the entry is not present a normal load is executed
  733. * - chk.a.*, if the entry is not present, execution jumps to recovery code
  734. *
  735. * In either case, the load can be potentially retried in another form.
  736. *
  737. * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
  738. * up a stale entry later). The register base update MUST also be performed.
  739. */
  740. /*
  741. * when the load has the .acq completer then
  742. * use ordering fence.
  743. */
  744. if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
  745. mb();
  746. /*
  747. * invalidate ALAT entry in case of advanced load
  748. */
  749. if (ld.x6_op == 0x2)
  750. invala_gr(ld.r1);
  751. return 0;
  752. }
  753. static int
  754. emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
  755. {
  756. unsigned long r2;
  757. unsigned int len = 1 << ld.x6_sz;
  758. /*
  759. * if we get to this handler, Nat bits on both r3 and r2 have already
  760. * been checked. so we don't need to do it
  761. *
  762. * extract the value to be stored
  763. */
  764. getreg(ld.imm, &r2, NULL, regs);
  765. /*
  766. * we rely on the macros in unaligned.h for now i.e.,
  767. * we let the compiler figure out how to read memory gracefully.
  768. *
  769. * We need this switch/case because the way the inline function
  770. * works. The code is optimized by the compiler and looks like
  771. * a single switch/case.
  772. */
  773. DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
  774. if (len != 2 && len != 4 && len != 8) {
  775. DPRINT("unknown size: x6=%d\n", ld.x6_sz);
  776. return -1;
  777. }
  778. /* this assumes little-endian byte-order: */
  779. if (copy_to_user((void __user *) ifa, &r2, len))
  780. return -1;
  781. /*
  782. * stX [r3]=r2,imm(9)
  783. *
  784. * NOTE:
  785. * ld.r3 can never be r0, because r0 would not generate an
  786. * unaligned access.
  787. */
  788. if (ld.op == 0x5) {
  789. unsigned long imm;
  790. /*
  791. * form imm9: [12:6] contain first 7bits
  792. */
  793. imm = ld.x << 7 | ld.r1;
  794. /*
  795. * sign extend (8bits) if m set
  796. */
  797. if (ld.m) imm |= SIGN_EXT9;
  798. /*
  799. * ifa == r3 (NaT is necessarily cleared)
  800. */
  801. ifa += imm;
  802. DPRINT("imm=%lx r3=%lx\n", imm, ifa);
  803. setreg(ld.r3, ifa, 0, regs);
  804. }
  805. /*
  806. * we don't have alat_invalidate_multiple() so we need
  807. * to do the complete flush :-<<
  808. */
  809. ia64_invala();
  810. /*
  811. * stX.rel: use fence instead of release
  812. */
  813. if (ld.x6_op == 0xd)
  814. mb();
  815. return 0;
  816. }
  817. /*
  818. * floating point operations sizes in bytes
  819. */
  820. static const unsigned char float_fsz[4]={
  821. 10, /* extended precision (e) */
  822. 8, /* integer (8) */
  823. 4, /* single precision (s) */
  824. 8 /* double precision (d) */
  825. };
  826. static inline void
  827. mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
  828. {
  829. ia64_ldfe(6, init);
  830. ia64_stop();
  831. ia64_stf_spill(final, 6);
  832. }
  833. static inline void
  834. mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
  835. {
  836. ia64_ldf8(6, init);
  837. ia64_stop();
  838. ia64_stf_spill(final, 6);
  839. }
  840. static inline void
  841. mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
  842. {
  843. ia64_ldfs(6, init);
  844. ia64_stop();
  845. ia64_stf_spill(final, 6);
  846. }
  847. static inline void
  848. mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
  849. {
  850. ia64_ldfd(6, init);
  851. ia64_stop();
  852. ia64_stf_spill(final, 6);
  853. }
  854. static inline void
  855. float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
  856. {
  857. ia64_ldf_fill(6, init);
  858. ia64_stop();
  859. ia64_stfe(final, 6);
  860. }
  861. static inline void
  862. float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
  863. {
  864. ia64_ldf_fill(6, init);
  865. ia64_stop();
  866. ia64_stf8(final, 6);
  867. }
  868. static inline void
  869. float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
  870. {
  871. ia64_ldf_fill(6, init);
  872. ia64_stop();
  873. ia64_stfs(final, 6);
  874. }
  875. static inline void
  876. float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
  877. {
  878. ia64_ldf_fill(6, init);
  879. ia64_stop();
  880. ia64_stfd(final, 6);
  881. }
  882. static int
  883. emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
  884. {
  885. struct ia64_fpreg fpr_init[2];
  886. struct ia64_fpreg fpr_final[2];
  887. unsigned long len = float_fsz[ld.x6_sz];
  888. /*
  889. * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
  890. * higher priority than unaligned faults.
  891. *
  892. * r0 cannot be found as the base as it would never generate an unaligned
  893. * reference.
  894. */
  895. /*
  896. * make sure we get clean buffers
  897. */
  898. memset(&fpr_init, 0, sizeof(fpr_init));
  899. memset(&fpr_final, 0, sizeof(fpr_final));
  900. /*
  901. * ldfpX.a: we don't try to emulate anything but we must
  902. * invalidate the ALAT entry and execute updates, if any.
  903. */
  904. if (ld.x6_op != 0x2) {
  905. /*
  906. * This assumes little-endian byte-order. Note that there is no "ldfpe"
  907. * instruction:
  908. */
  909. if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
  910. || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
  911. return -1;
  912. DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
  913. DDUMP("frp_init =", &fpr_init, 2*len);
  914. /*
  915. * XXX fixme
  916. * Could optimize inlines by using ldfpX & 2 spills
  917. */
  918. switch( ld.x6_sz ) {
  919. case 0:
  920. mem2float_extended(&fpr_init[0], &fpr_final[0]);
  921. mem2float_extended(&fpr_init[1], &fpr_final[1]);
  922. break;
  923. case 1:
  924. mem2float_integer(&fpr_init[0], &fpr_final[0]);
  925. mem2float_integer(&fpr_init[1], &fpr_final[1]);
  926. break;
  927. case 2:
  928. mem2float_single(&fpr_init[0], &fpr_final[0]);
  929. mem2float_single(&fpr_init[1], &fpr_final[1]);
  930. break;
  931. case 3:
  932. mem2float_double(&fpr_init[0], &fpr_final[0]);
  933. mem2float_double(&fpr_init[1], &fpr_final[1]);
  934. break;
  935. }
  936. DDUMP("fpr_final =", &fpr_final, 2*len);
  937. /*
  938. * XXX fixme
  939. *
  940. * A possible optimization would be to drop fpr_final and directly
  941. * use the storage from the saved context i.e., the actual final
  942. * destination (pt_regs, switch_stack or thread structure).
  943. */
  944. setfpreg(ld.r1, &fpr_final[0], regs);
  945. setfpreg(ld.imm, &fpr_final[1], regs);
  946. }
  947. /*
  948. * Check for updates: only immediate updates are available for this
  949. * instruction.
  950. */
  951. if (ld.m) {
  952. /*
  953. * the immediate is implicit given the ldsz of the operation:
  954. * single: 8 (2x4) and for all others it's 16 (2x8)
  955. */
  956. ifa += len<<1;
  957. /*
  958. * IMPORTANT:
  959. * the fact that we force the NaT of r3 to zero is ONLY valid
  960. * as long as we don't come here with a ldfpX.s.
  961. * For this reason we keep this sanity check
  962. */
  963. if (ld.x6_op == 1 || ld.x6_op == 3)
  964. printk(KERN_ERR "%s: register update on speculative load pair, error\n",
  965. __func__);
  966. setreg(ld.r3, ifa, 0, regs);
  967. }
  968. /*
  969. * Invalidate ALAT entries, if any, for both registers.
  970. */
  971. if (ld.x6_op == 0x2) {
  972. invala_fr(ld.r1);
  973. invala_fr(ld.imm);
  974. }
  975. return 0;
  976. }
  977. static int
  978. emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
  979. {
  980. struct ia64_fpreg fpr_init;
  981. struct ia64_fpreg fpr_final;
  982. unsigned long len = float_fsz[ld.x6_sz];
  983. /*
  984. * fr0 & fr1 don't need to be checked because Illegal Instruction
  985. * faults have higher priority than unaligned faults.
  986. *
  987. * r0 cannot be found as the base as it would never generate an
  988. * unaligned reference.
  989. */
  990. /*
  991. * make sure we get clean buffers
  992. */
  993. memset(&fpr_init,0, sizeof(fpr_init));
  994. memset(&fpr_final,0, sizeof(fpr_final));
  995. /*
  996. * ldfX.a we don't try to emulate anything but we must
  997. * invalidate the ALAT entry.
  998. * See comments in ldX for descriptions on how the various loads are handled.
  999. */
  1000. if (ld.x6_op != 0x2) {
  1001. if (copy_from_user(&fpr_init, (void __user *) ifa, len))
  1002. return -1;
  1003. DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
  1004. DDUMP("fpr_init =", &fpr_init, len);
  1005. /*
  1006. * we only do something for x6_op={0,8,9}
  1007. */
  1008. switch( ld.x6_sz ) {
  1009. case 0:
  1010. mem2float_extended(&fpr_init, &fpr_final);
  1011. break;
  1012. case 1:
  1013. mem2float_integer(&fpr_init, &fpr_final);
  1014. break;
  1015. case 2:
  1016. mem2float_single(&fpr_init, &fpr_final);
  1017. break;
  1018. case 3:
  1019. mem2float_double(&fpr_init, &fpr_final);
  1020. break;
  1021. }
  1022. DDUMP("fpr_final =", &fpr_final, len);
  1023. /*
  1024. * XXX fixme
  1025. *
  1026. * A possible optimization would be to drop fpr_final and directly
  1027. * use the storage from the saved context i.e., the actual final
  1028. * destination (pt_regs, switch_stack or thread structure).
  1029. */
  1030. setfpreg(ld.r1, &fpr_final, regs);
  1031. }
  1032. /*
  1033. * check for updates on any loads
  1034. */
  1035. if (ld.op == 0x7 || ld.m)
  1036. emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
  1037. /*
  1038. * invalidate ALAT entry in case of advanced floating point loads
  1039. */
  1040. if (ld.x6_op == 0x2)
  1041. invala_fr(ld.r1);
  1042. return 0;
  1043. }
  1044. static int
  1045. emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
  1046. {
  1047. struct ia64_fpreg fpr_init;
  1048. struct ia64_fpreg fpr_final;
  1049. unsigned long len = float_fsz[ld.x6_sz];
  1050. /*
  1051. * make sure we get clean buffers
  1052. */
  1053. memset(&fpr_init,0, sizeof(fpr_init));
  1054. memset(&fpr_final,0, sizeof(fpr_final));
  1055. /*
  1056. * if we get to this handler, Nat bits on both r3 and r2 have already
  1057. * been checked. so we don't need to do it
  1058. *
  1059. * extract the value to be stored
  1060. */
  1061. getfpreg(ld.imm, &fpr_init, regs);
  1062. /*
  1063. * during this step, we extract the spilled registers from the saved
  1064. * context i.e., we refill. Then we store (no spill) to temporary
  1065. * aligned location
  1066. */
  1067. switch( ld.x6_sz ) {
  1068. case 0:
  1069. float2mem_extended(&fpr_init, &fpr_final);
  1070. break;
  1071. case 1:
  1072. float2mem_integer(&fpr_init, &fpr_final);
  1073. break;
  1074. case 2:
  1075. float2mem_single(&fpr_init, &fpr_final);
  1076. break;
  1077. case 3:
  1078. float2mem_double(&fpr_init, &fpr_final);
  1079. break;
  1080. }
  1081. DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
  1082. DDUMP("fpr_init =", &fpr_init, len);
  1083. DDUMP("fpr_final =", &fpr_final, len);
  1084. if (copy_to_user((void __user *) ifa, &fpr_final, len))
  1085. return -1;
  1086. /*
  1087. * stfX [r3]=r2,imm(9)
  1088. *
  1089. * NOTE:
  1090. * ld.r3 can never be r0, because r0 would not generate an
  1091. * unaligned access.
  1092. */
  1093. if (ld.op == 0x7) {
  1094. unsigned long imm;
  1095. /*
  1096. * form imm9: [12:6] contain first 7bits
  1097. */
  1098. imm = ld.x << 7 | ld.r1;
  1099. /*
  1100. * sign extend (8bits) if m set
  1101. */
  1102. if (ld.m)
  1103. imm |= SIGN_EXT9;
  1104. /*
  1105. * ifa == r3 (NaT is necessarily cleared)
  1106. */
  1107. ifa += imm;
  1108. DPRINT("imm=%lx r3=%lx\n", imm, ifa);
  1109. setreg(ld.r3, ifa, 0, regs);
  1110. }
  1111. /*
  1112. * we don't have alat_invalidate_multiple() so we need
  1113. * to do the complete flush :-<<
  1114. */
  1115. ia64_invala();
  1116. return 0;
  1117. }
  1118. /*
  1119. * Make sure we log the unaligned access, so that user/sysadmin can notice it and
  1120. * eventually fix the program. However, we don't want to do that for every access so we
  1121. * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
  1122. * either...
  1123. */
  1124. static int
  1125. within_logging_rate_limit (void)
  1126. {
  1127. static unsigned long count, last_time;
  1128. if (time_after(jiffies, last_time + 5 * HZ))
  1129. count = 0;
  1130. if (count < 5) {
  1131. last_time = jiffies;
  1132. count++;
  1133. return 1;
  1134. }
  1135. return 0;
  1136. }
  1137. void
  1138. ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
  1139. {
  1140. struct ia64_psr *ipsr = ia64_psr(regs);
  1141. mm_segment_t old_fs = get_fs();
  1142. unsigned long bundle[2];
  1143. unsigned long opcode;
  1144. struct siginfo si;
  1145. const struct exception_table_entry *eh = NULL;
  1146. union {
  1147. unsigned long l;
  1148. load_store_t insn;
  1149. } u;
  1150. int ret = -1;
  1151. if (ia64_psr(regs)->be) {
  1152. /* we don't support big-endian accesses */
  1153. if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
  1154. return;
  1155. goto force_sigbus;
  1156. }
  1157. /*
  1158. * Treat kernel accesses for which there is an exception handler entry the same as
  1159. * user-level unaligned accesses. Otherwise, a clever program could trick this
  1160. * handler into reading an arbitrary kernel addresses...
  1161. */
  1162. if (!user_mode(regs))
  1163. eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
  1164. if (user_mode(regs) || eh) {
  1165. if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
  1166. goto force_sigbus;
  1167. if (!no_unaligned_warning &&
  1168. !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
  1169. within_logging_rate_limit())
  1170. {
  1171. char buf[200]; /* comm[] is at most 16 bytes... */
  1172. size_t len;
  1173. len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
  1174. "ip=0x%016lx\n\r", current->comm,
  1175. task_pid_nr(current),
  1176. ifa, regs->cr_iip + ipsr->ri);
  1177. /*
  1178. * Don't call tty_write_message() if we're in the kernel; we might
  1179. * be holding locks...
  1180. */
  1181. if (user_mode(regs))
  1182. tty_write_message(current->signal->tty, buf);
  1183. buf[len-1] = '\0'; /* drop '\r' */
  1184. /* watch for command names containing %s */
  1185. printk(KERN_WARNING "%s", buf);
  1186. } else {
  1187. if (no_unaligned_warning && !noprint_warning) {
  1188. noprint_warning = 1;
  1189. printk(KERN_WARNING "%s(%d) encountered an "
  1190. "unaligned exception which required\n"
  1191. "kernel assistance, which degrades "
  1192. "the performance of the application.\n"
  1193. "Unaligned exception warnings have "
  1194. "been disabled by the system "
  1195. "administrator\n"
  1196. "echo 0 > /proc/sys/kernel/ignore-"
  1197. "unaligned-usertrap to re-enable\n",
  1198. current->comm, task_pid_nr(current));
  1199. }
  1200. }
  1201. } else {
  1202. if (within_logging_rate_limit())
  1203. printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
  1204. ifa, regs->cr_iip + ipsr->ri);
  1205. set_fs(KERNEL_DS);
  1206. }
  1207. DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
  1208. regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
  1209. if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
  1210. goto failure;
  1211. /*
  1212. * extract the instruction from the bundle given the slot number
  1213. */
  1214. switch (ipsr->ri) {
  1215. case 0: u.l = (bundle[0] >> 5); break;
  1216. case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
  1217. case 2: u.l = (bundle[1] >> 23); break;
  1218. }
  1219. opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
  1220. DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
  1221. "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
  1222. u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
  1223. /*
  1224. * IMPORTANT:
  1225. * Notice that the switch statement DOES not cover all possible instructions
  1226. * that DO generate unaligned references. This is made on purpose because for some
  1227. * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
  1228. * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
  1229. * the program will get a signal and die:
  1230. *
  1231. * load/store:
  1232. * - ldX.spill
  1233. * - stX.spill
  1234. * Reason: RNATs are based on addresses
  1235. * - ld16
  1236. * - st16
  1237. * Reason: ld16 and st16 are supposed to occur in a single
  1238. * memory op
  1239. *
  1240. * synchronization:
  1241. * - cmpxchg
  1242. * - fetchadd
  1243. * - xchg
  1244. * Reason: ATOMIC operations cannot be emulated properly using multiple
  1245. * instructions.
  1246. *
  1247. * speculative loads:
  1248. * - ldX.sZ
  1249. * Reason: side effects, code must be ready to deal with failure so simpler
  1250. * to let the load fail.
  1251. * ---------------------------------------------------------------------------------
  1252. * XXX fixme
  1253. *
  1254. * I would like to get rid of this switch case and do something
  1255. * more elegant.
  1256. */
  1257. switch (opcode) {
  1258. case LDS_OP:
  1259. case LDSA_OP:
  1260. if (u.insn.x)
  1261. /* oops, really a semaphore op (cmpxchg, etc) */
  1262. goto failure;
  1263. /* no break */
  1264. case LDS_IMM_OP:
  1265. case LDSA_IMM_OP:
  1266. case LDFS_OP:
  1267. case LDFSA_OP:
  1268. case LDFS_IMM_OP:
  1269. /*
  1270. * The instruction will be retried with deferred exceptions turned on, and
  1271. * we should get Nat bit installed
  1272. *
  1273. * IMPORTANT: When PSR_ED is set, the register & immediate update forms
  1274. * are actually executed even though the operation failed. So we don't
  1275. * need to take care of this.
  1276. */
  1277. DPRINT("forcing PSR_ED\n");
  1278. regs->cr_ipsr |= IA64_PSR_ED;
  1279. goto done;
  1280. case LD_OP:
  1281. case LDA_OP:
  1282. case LDBIAS_OP:
  1283. case LDACQ_OP:
  1284. case LDCCLR_OP:
  1285. case LDCNC_OP:
  1286. case LDCCLRACQ_OP:
  1287. if (u.insn.x)
  1288. /* oops, really a semaphore op (cmpxchg, etc) */
  1289. goto failure;
  1290. /* no break */
  1291. case LD_IMM_OP:
  1292. case LDA_IMM_OP:
  1293. case LDBIAS_IMM_OP:
  1294. case LDACQ_IMM_OP:
  1295. case LDCCLR_IMM_OP:
  1296. case LDCNC_IMM_OP:
  1297. case LDCCLRACQ_IMM_OP:
  1298. ret = emulate_load_int(ifa, u.insn, regs);
  1299. break;
  1300. case ST_OP:
  1301. case STREL_OP:
  1302. if (u.insn.x)
  1303. /* oops, really a semaphore op (cmpxchg, etc) */
  1304. goto failure;
  1305. /* no break */
  1306. case ST_IMM_OP:
  1307. case STREL_IMM_OP:
  1308. ret = emulate_store_int(ifa, u.insn, regs);
  1309. break;
  1310. case LDF_OP:
  1311. case LDFA_OP:
  1312. case LDFCCLR_OP:
  1313. case LDFCNC_OP:
  1314. if (u.insn.x)
  1315. ret = emulate_load_floatpair(ifa, u.insn, regs);
  1316. else
  1317. ret = emulate_load_float(ifa, u.insn, regs);
  1318. break;
  1319. case LDF_IMM_OP:
  1320. case LDFA_IMM_OP:
  1321. case LDFCCLR_IMM_OP:
  1322. case LDFCNC_IMM_OP:
  1323. ret = emulate_load_float(ifa, u.insn, regs);
  1324. break;
  1325. case STF_OP:
  1326. case STF_IMM_OP:
  1327. ret = emulate_store_float(ifa, u.insn, regs);
  1328. break;
  1329. default:
  1330. goto failure;
  1331. }
  1332. DPRINT("ret=%d\n", ret);
  1333. if (ret)
  1334. goto failure;
  1335. if (ipsr->ri == 2)
  1336. /*
  1337. * given today's architecture this case is not likely to happen because a
  1338. * memory access instruction (M) can never be in the last slot of a
  1339. * bundle. But let's keep it for now.
  1340. */
  1341. regs->cr_iip += 16;
  1342. ipsr->ri = (ipsr->ri + 1) & 0x3;
  1343. DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
  1344. done:
  1345. set_fs(old_fs); /* restore original address limit */
  1346. return;
  1347. failure:
  1348. /* something went wrong... */
  1349. if (!user_mode(regs)) {
  1350. if (eh) {
  1351. ia64_handle_exception(regs, eh);
  1352. goto done;
  1353. }
  1354. if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
  1355. return;
  1356. /* NOT_REACHED */
  1357. }
  1358. force_sigbus:
  1359. si.si_signo = SIGBUS;
  1360. si.si_errno = 0;
  1361. si.si_code = BUS_ADRALN;
  1362. si.si_addr = (void __user *) ifa;
  1363. si.si_flags = 0;
  1364. si.si_isr = 0;
  1365. si.si_imm = 0;
  1366. force_sig_info(SIGBUS, &si, current);
  1367. goto done;
  1368. }