gen-insn-attr-x86.awk 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. #!/bin/awk -f
  2. # gen-insn-attr-x86.awk: Instruction attribute table generator
  3. # Written by Masami Hiramatsu <mhiramat@redhat.com>
  4. #
  5. # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
  6. # Awk implementation sanity check
  7. function check_awk_implement() {
  8. if (!match("abc", "[[:lower:]]+"))
  9. return "Your awk doesn't support charactor-class."
  10. if (sprintf("%x", 0) != "0")
  11. return "Your awk has a printf-format problem."
  12. return ""
  13. }
  14. # Clear working vars
  15. function clear_vars() {
  16. delete table
  17. delete lptable2
  18. delete lptable1
  19. delete lptable3
  20. eid = -1 # escape id
  21. gid = -1 # group id
  22. aid = -1 # AVX id
  23. tname = ""
  24. }
  25. BEGIN {
  26. # Implementation error checking
  27. awkchecked = check_awk_implement()
  28. if (awkchecked != "") {
  29. print "Error: " awkchecked > "/dev/stderr"
  30. print "Please try to use gawk." > "/dev/stderr"
  31. exit 1
  32. }
  33. # Setup generating tables
  34. print "/* x86 opcode map generated from x86-opcode-map.txt */"
  35. print "/* Do not change this code. */\n"
  36. ggid = 1
  37. geid = 1
  38. gaid = 0
  39. delete etable
  40. delete gtable
  41. delete atable
  42. opnd_expr = "^[[:alpha:]/]"
  43. ext_expr = "^\\("
  44. sep_expr = "^\\|$"
  45. group_expr = "^Grp[[:alnum:]]+"
  46. imm_expr = "^[IJAO][[:lower:]]"
  47. imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
  48. imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
  49. imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
  50. imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
  51. imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
  52. imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
  53. imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
  54. imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
  55. imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
  56. imm_flag["Ob"] = "INAT_MOFFSET"
  57. imm_flag["Ov"] = "INAT_MOFFSET"
  58. modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])"
  59. force64_expr = "\\([df]64\\)"
  60. rex_expr = "^REX(\\.[XRWB]+)*"
  61. fpu_expr = "^ESC" # TODO
  62. lprefix1_expr = "\\(66\\)"
  63. lprefix2_expr = "\\(F3\\)"
  64. lprefix3_expr = "\\(F2\\)"
  65. max_lprefix = 4
  66. vexok_expr = "\\(VEX\\)"
  67. vexonly_expr = "\\(oVEX\\)"
  68. prefix_expr = "\\(Prefix\\)"
  69. prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
  70. prefix_num["REPNE"] = "INAT_PFX_REPNE"
  71. prefix_num["REP/REPE"] = "INAT_PFX_REPE"
  72. prefix_num["LOCK"] = "INAT_PFX_LOCK"
  73. prefix_num["SEG=CS"] = "INAT_PFX_CS"
  74. prefix_num["SEG=DS"] = "INAT_PFX_DS"
  75. prefix_num["SEG=ES"] = "INAT_PFX_ES"
  76. prefix_num["SEG=FS"] = "INAT_PFX_FS"
  77. prefix_num["SEG=GS"] = "INAT_PFX_GS"
  78. prefix_num["SEG=SS"] = "INAT_PFX_SS"
  79. prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
  80. prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2"
  81. prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3"
  82. clear_vars()
  83. }
  84. function semantic_error(msg) {
  85. print "Semantic error at " NR ": " msg > "/dev/stderr"
  86. exit 1
  87. }
  88. function debug(msg) {
  89. print "DEBUG: " msg
  90. }
  91. function array_size(arr, i,c) {
  92. c = 0
  93. for (i in arr)
  94. c++
  95. return c
  96. }
  97. /^Table:/ {
  98. print "/* " $0 " */"
  99. if (tname != "")
  100. semantic_error("Hit Table: before EndTable:.");
  101. }
  102. /^Referrer:/ {
  103. if (NF != 1) {
  104. # escape opcode table
  105. ref = ""
  106. for (i = 2; i <= NF; i++)
  107. ref = ref $i
  108. eid = escape[ref]
  109. tname = sprintf("inat_escape_table_%d", eid)
  110. }
  111. }
  112. /^AVXcode:/ {
  113. if (NF != 1) {
  114. # AVX/escape opcode table
  115. aid = $2
  116. if (gaid <= aid)
  117. gaid = aid + 1
  118. if (tname == "") # AVX only opcode table
  119. tname = sprintf("inat_avx_table_%d", $2)
  120. }
  121. if (aid == -1 && eid == -1) # primary opcode table
  122. tname = "inat_primary_table"
  123. }
  124. /^GrpTable:/ {
  125. print "/* " $0 " */"
  126. if (!($2 in group))
  127. semantic_error("No group: " $2 )
  128. gid = group[$2]
  129. tname = "inat_group_table_" gid
  130. }
  131. function print_table(tbl,name,fmt,n)
  132. {
  133. print "const insn_attr_t " name " = {"
  134. for (i = 0; i < n; i++) {
  135. id = sprintf(fmt, i)
  136. if (tbl[id])
  137. print " [" id "] = " tbl[id] ","
  138. }
  139. print "};"
  140. }
  141. /^EndTable/ {
  142. if (gid != -1) {
  143. # print group tables
  144. if (array_size(table) != 0) {
  145. print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
  146. "0x%x", 8)
  147. gtable[gid,0] = tname
  148. }
  149. if (array_size(lptable1) != 0) {
  150. print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
  151. "0x%x", 8)
  152. gtable[gid,1] = tname "_1"
  153. }
  154. if (array_size(lptable2) != 0) {
  155. print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
  156. "0x%x", 8)
  157. gtable[gid,2] = tname "_2"
  158. }
  159. if (array_size(lptable3) != 0) {
  160. print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
  161. "0x%x", 8)
  162. gtable[gid,3] = tname "_3"
  163. }
  164. } else {
  165. # print primary/escaped tables
  166. if (array_size(table) != 0) {
  167. print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
  168. "0x%02x", 256)
  169. etable[eid,0] = tname
  170. if (aid >= 0)
  171. atable[aid,0] = tname
  172. }
  173. if (array_size(lptable1) != 0) {
  174. print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
  175. "0x%02x", 256)
  176. etable[eid,1] = tname "_1"
  177. if (aid >= 0)
  178. atable[aid,1] = tname "_1"
  179. }
  180. if (array_size(lptable2) != 0) {
  181. print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
  182. "0x%02x", 256)
  183. etable[eid,2] = tname "_2"
  184. if (aid >= 0)
  185. atable[aid,2] = tname "_2"
  186. }
  187. if (array_size(lptable3) != 0) {
  188. print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
  189. "0x%02x", 256)
  190. etable[eid,3] = tname "_3"
  191. if (aid >= 0)
  192. atable[aid,3] = tname "_3"
  193. }
  194. }
  195. print ""
  196. clear_vars()
  197. }
  198. function add_flags(old,new) {
  199. if (old && new)
  200. return old " | " new
  201. else if (old)
  202. return old
  203. else
  204. return new
  205. }
  206. # convert operands to flags.
  207. function convert_operands(opnd, i,imm,mod)
  208. {
  209. imm = null
  210. mod = null
  211. for (i in opnd) {
  212. i = opnd[i]
  213. if (match(i, imm_expr) == 1) {
  214. if (!imm_flag[i])
  215. semantic_error("Unknown imm opnd: " i)
  216. if (imm) {
  217. if (i != "Ib")
  218. semantic_error("Second IMM error")
  219. imm = add_flags(imm, "INAT_SCNDIMM")
  220. } else
  221. imm = imm_flag[i]
  222. } else if (match(i, modrm_expr))
  223. mod = "INAT_MODRM"
  224. }
  225. return add_flags(imm, mod)
  226. }
  227. /^[0-9a-f]+\:/ {
  228. if (NR == 1)
  229. next
  230. # get index
  231. idx = "0x" substr($1, 1, index($1,":") - 1)
  232. if (idx in table)
  233. semantic_error("Redefine " idx " in " tname)
  234. # check if escaped opcode
  235. if ("escape" == $2) {
  236. if ($3 != "#")
  237. semantic_error("No escaped name")
  238. ref = ""
  239. for (i = 4; i <= NF; i++)
  240. ref = ref $i
  241. if (ref in escape)
  242. semantic_error("Redefine escape (" ref ")")
  243. escape[ref] = geid
  244. geid++
  245. table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
  246. next
  247. }
  248. variant = null
  249. # converts
  250. i = 2
  251. while (i <= NF) {
  252. opcode = $(i++)
  253. delete opnds
  254. ext = null
  255. flags = null
  256. opnd = null
  257. # parse one opcode
  258. if (match($i, opnd_expr)) {
  259. opnd = $i
  260. split($(i++), opnds, ",")
  261. flags = convert_operands(opnds)
  262. }
  263. if (match($i, ext_expr))
  264. ext = $(i++)
  265. if (match($i, sep_expr))
  266. i++
  267. else if (i < NF)
  268. semantic_error($i " is not a separator")
  269. # check if group opcode
  270. if (match(opcode, group_expr)) {
  271. if (!(opcode in group)) {
  272. group[opcode] = ggid
  273. ggid++
  274. }
  275. flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
  276. }
  277. # check force(or default) 64bit
  278. if (match(ext, force64_expr))
  279. flags = add_flags(flags, "INAT_FORCE64")
  280. # check REX prefix
  281. if (match(opcode, rex_expr))
  282. flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
  283. # check coprocessor escape : TODO
  284. if (match(opcode, fpu_expr))
  285. flags = add_flags(flags, "INAT_MODRM")
  286. # check VEX only code
  287. if (match(ext, vexonly_expr))
  288. flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
  289. # check VEX only code
  290. if (match(ext, vexok_expr))
  291. flags = add_flags(flags, "INAT_VEXOK")
  292. # check prefixes
  293. if (match(ext, prefix_expr)) {
  294. if (!prefix_num[opcode])
  295. semantic_error("Unknown prefix: " opcode)
  296. flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
  297. }
  298. if (length(flags) == 0)
  299. continue
  300. # check if last prefix
  301. if (match(ext, lprefix1_expr)) {
  302. lptable1[idx] = add_flags(lptable1[idx],flags)
  303. variant = "INAT_VARIANT"
  304. } else if (match(ext, lprefix2_expr)) {
  305. lptable2[idx] = add_flags(lptable2[idx],flags)
  306. variant = "INAT_VARIANT"
  307. } else if (match(ext, lprefix3_expr)) {
  308. lptable3[idx] = add_flags(lptable3[idx],flags)
  309. variant = "INAT_VARIANT"
  310. } else {
  311. table[idx] = add_flags(table[idx],flags)
  312. }
  313. }
  314. if (variant)
  315. table[idx] = add_flags(table[idx],variant)
  316. }
  317. END {
  318. if (awkchecked != "")
  319. exit 1
  320. # print escape opcode map's array
  321. print "/* Escape opcode map array */"
  322. print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
  323. "[INAT_LSTPFX_MAX + 1] = {"
  324. for (i = 0; i < geid; i++)
  325. for (j = 0; j < max_lprefix; j++)
  326. if (etable[i,j])
  327. print " ["i"]["j"] = "etable[i,j]","
  328. print "};\n"
  329. # print group opcode map's array
  330. print "/* Group opcode map array */"
  331. print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
  332. "[INAT_LSTPFX_MAX + 1] = {"
  333. for (i = 0; i < ggid; i++)
  334. for (j = 0; j < max_lprefix; j++)
  335. if (gtable[i,j])
  336. print " ["i"]["j"] = "gtable[i,j]","
  337. print "};\n"
  338. # print AVX opcode map's array
  339. print "/* AVX opcode map array */"
  340. print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\
  341. "[INAT_LSTPFX_MAX + 1] = {"
  342. for (i = 0; i < gaid; i++)
  343. for (j = 0; j < max_lprefix; j++)
  344. if (atable[i,j])
  345. print " ["i"]["j"] = "atable[i,j]","
  346. print "};"
  347. }