entry_64.S 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. #include <asm/paravirt.h>
  52. #include <asm/ftrace.h>
  53. /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
  54. #include <linux/elf-em.h>
  55. #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
  56. #define __AUDIT_ARCH_64BIT 0x80000000
  57. #define __AUDIT_ARCH_LE 0x40000000
  58. .code64
  59. #ifdef CONFIG_FUNCTION_TRACER
  60. #ifdef CONFIG_DYNAMIC_FTRACE
  61. ENTRY(mcount)
  62. retq
  63. END(mcount)
  64. ENTRY(ftrace_caller)
  65. cmpl $0, function_trace_stop
  66. jne ftrace_stub
  67. /* taken from glibc */
  68. subq $0x38, %rsp
  69. movq %rax, (%rsp)
  70. movq %rcx, 8(%rsp)
  71. movq %rdx, 16(%rsp)
  72. movq %rsi, 24(%rsp)
  73. movq %rdi, 32(%rsp)
  74. movq %r8, 40(%rsp)
  75. movq %r9, 48(%rsp)
  76. movq 0x38(%rsp), %rdi
  77. movq 8(%rbp), %rsi
  78. subq $MCOUNT_INSN_SIZE, %rdi
  79. .globl ftrace_call
  80. ftrace_call:
  81. call ftrace_stub
  82. movq 48(%rsp), %r9
  83. movq 40(%rsp), %r8
  84. movq 32(%rsp), %rdi
  85. movq 24(%rsp), %rsi
  86. movq 16(%rsp), %rdx
  87. movq 8(%rsp), %rcx
  88. movq (%rsp), %rax
  89. addq $0x38, %rsp
  90. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  91. .globl ftrace_graph_call
  92. ftrace_graph_call:
  93. jmp ftrace_stub
  94. #endif
  95. .globl ftrace_stub
  96. ftrace_stub:
  97. retq
  98. END(ftrace_caller)
  99. #else /* ! CONFIG_DYNAMIC_FTRACE */
  100. ENTRY(mcount)
  101. cmpl $0, function_trace_stop
  102. jne ftrace_stub
  103. cmpq $ftrace_stub, ftrace_trace_function
  104. jnz trace
  105. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  106. cmpq $ftrace_stub, ftrace_graph_return
  107. jnz ftrace_graph_caller
  108. #endif
  109. .globl ftrace_stub
  110. ftrace_stub:
  111. retq
  112. trace:
  113. /* taken from glibc */
  114. subq $0x38, %rsp
  115. movq %rax, (%rsp)
  116. movq %rcx, 8(%rsp)
  117. movq %rdx, 16(%rsp)
  118. movq %rsi, 24(%rsp)
  119. movq %rdi, 32(%rsp)
  120. movq %r8, 40(%rsp)
  121. movq %r9, 48(%rsp)
  122. movq 0x38(%rsp), %rdi
  123. movq 8(%rbp), %rsi
  124. subq $MCOUNT_INSN_SIZE, %rdi
  125. call *ftrace_trace_function
  126. movq 48(%rsp), %r9
  127. movq 40(%rsp), %r8
  128. movq 32(%rsp), %rdi
  129. movq 24(%rsp), %rsi
  130. movq 16(%rsp), %rdx
  131. movq 8(%rsp), %rcx
  132. movq (%rsp), %rax
  133. addq $0x38, %rsp
  134. jmp ftrace_stub
  135. END(mcount)
  136. #endif /* CONFIG_DYNAMIC_FTRACE */
  137. #endif /* CONFIG_FUNCTION_TRACER */
  138. #ifdef CONFIG_FUNCTION_GRAPH_TRACER
  139. ENTRY(ftrace_graph_caller)
  140. cmpl $0, function_trace_stop
  141. jne ftrace_stub
  142. subq $0x38, %rsp
  143. movq %rax, (%rsp)
  144. movq %rcx, 8(%rsp)
  145. movq %rdx, 16(%rsp)
  146. movq %rsi, 24(%rsp)
  147. movq %rdi, 32(%rsp)
  148. movq %r8, 40(%rsp)
  149. movq %r9, 48(%rsp)
  150. leaq 8(%rbp), %rdi
  151. movq 0x38(%rsp), %rsi
  152. call prepare_ftrace_return
  153. movq 48(%rsp), %r9
  154. movq 40(%rsp), %r8
  155. movq 32(%rsp), %rdi
  156. movq 24(%rsp), %rsi
  157. movq 16(%rsp), %rdx
  158. movq 8(%rsp), %rcx
  159. movq (%rsp), %rax
  160. addq $0x38, %rsp
  161. retq
  162. END(ftrace_graph_caller)
  163. .globl return_to_handler
  164. return_to_handler:
  165. subq $80, %rsp
  166. movq %rax, (%rsp)
  167. movq %rcx, 8(%rsp)
  168. movq %rdx, 16(%rsp)
  169. movq %rsi, 24(%rsp)
  170. movq %rdi, 32(%rsp)
  171. movq %r8, 40(%rsp)
  172. movq %r9, 48(%rsp)
  173. movq %r10, 56(%rsp)
  174. movq %r11, 64(%rsp)
  175. call ftrace_return_to_handler
  176. movq %rax, 72(%rsp)
  177. movq 64(%rsp), %r11
  178. movq 56(%rsp), %r10
  179. movq 48(%rsp), %r9
  180. movq 40(%rsp), %r8
  181. movq 32(%rsp), %rdi
  182. movq 24(%rsp), %rsi
  183. movq 16(%rsp), %rdx
  184. movq 8(%rsp), %rcx
  185. movq (%rsp), %rax
  186. addq $72, %rsp
  187. retq
  188. #endif
  189. #ifndef CONFIG_PREEMPT
  190. #define retint_kernel retint_restore_args
  191. #endif
  192. #ifdef CONFIG_PARAVIRT
  193. ENTRY(native_usergs_sysret64)
  194. swapgs
  195. sysretq
  196. #endif /* CONFIG_PARAVIRT */
  197. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  198. #ifdef CONFIG_TRACE_IRQFLAGS
  199. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  200. jnc 1f
  201. TRACE_IRQS_ON
  202. 1:
  203. #endif
  204. .endm
  205. /*
  206. * C code is not supposed to know about undefined top of stack. Every time
  207. * a C function with an pt_regs argument is called from the SYSCALL based
  208. * fast path FIXUP_TOP_OF_STACK is needed.
  209. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  210. * manipulation.
  211. */
  212. /* %rsp:at FRAMEEND */
  213. .macro FIXUP_TOP_OF_STACK tmp
  214. movq %gs:pda_oldrsp,\tmp
  215. movq \tmp,RSP(%rsp)
  216. movq $__USER_DS,SS(%rsp)
  217. movq $__USER_CS,CS(%rsp)
  218. movq $-1,RCX(%rsp)
  219. movq R11(%rsp),\tmp /* get eflags */
  220. movq \tmp,EFLAGS(%rsp)
  221. .endm
  222. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  223. movq RSP-\offset(%rsp),\tmp
  224. movq \tmp,%gs:pda_oldrsp
  225. movq EFLAGS-\offset(%rsp),\tmp
  226. movq \tmp,R11-\offset(%rsp)
  227. .endm
  228. .macro FAKE_STACK_FRAME child_rip
  229. /* push in order ss, rsp, eflags, cs, rip */
  230. xorl %eax, %eax
  231. pushq $__KERNEL_DS /* ss */
  232. CFI_ADJUST_CFA_OFFSET 8
  233. /*CFI_REL_OFFSET ss,0*/
  234. pushq %rax /* rsp */
  235. CFI_ADJUST_CFA_OFFSET 8
  236. CFI_REL_OFFSET rsp,0
  237. pushq $(1<<9) /* eflags - interrupts on */
  238. CFI_ADJUST_CFA_OFFSET 8
  239. /*CFI_REL_OFFSET rflags,0*/
  240. pushq $__KERNEL_CS /* cs */
  241. CFI_ADJUST_CFA_OFFSET 8
  242. /*CFI_REL_OFFSET cs,0*/
  243. pushq \child_rip /* rip */
  244. CFI_ADJUST_CFA_OFFSET 8
  245. CFI_REL_OFFSET rip,0
  246. pushq %rax /* orig rax */
  247. CFI_ADJUST_CFA_OFFSET 8
  248. .endm
  249. .macro UNFAKE_STACK_FRAME
  250. addq $8*6, %rsp
  251. CFI_ADJUST_CFA_OFFSET -(6*8)
  252. .endm
  253. .macro CFI_DEFAULT_STACK start=1
  254. .if \start
  255. CFI_STARTPROC simple
  256. CFI_SIGNAL_FRAME
  257. CFI_DEF_CFA rsp,SS+8
  258. .else
  259. CFI_DEF_CFA_OFFSET SS+8
  260. .endif
  261. CFI_REL_OFFSET r15,R15
  262. CFI_REL_OFFSET r14,R14
  263. CFI_REL_OFFSET r13,R13
  264. CFI_REL_OFFSET r12,R12
  265. CFI_REL_OFFSET rbp,RBP
  266. CFI_REL_OFFSET rbx,RBX
  267. CFI_REL_OFFSET r11,R11
  268. CFI_REL_OFFSET r10,R10
  269. CFI_REL_OFFSET r9,R9
  270. CFI_REL_OFFSET r8,R8
  271. CFI_REL_OFFSET rax,RAX
  272. CFI_REL_OFFSET rcx,RCX
  273. CFI_REL_OFFSET rdx,RDX
  274. CFI_REL_OFFSET rsi,RSI
  275. CFI_REL_OFFSET rdi,RDI
  276. CFI_REL_OFFSET rip,RIP
  277. /*CFI_REL_OFFSET cs,CS*/
  278. /*CFI_REL_OFFSET rflags,EFLAGS*/
  279. CFI_REL_OFFSET rsp,RSP
  280. /*CFI_REL_OFFSET ss,SS*/
  281. .endm
  282. /*
  283. * A newly forked process directly context switches into this.
  284. */
  285. /* rdi: prev */
  286. ENTRY(ret_from_fork)
  287. CFI_DEFAULT_STACK
  288. push kernel_eflags(%rip)
  289. CFI_ADJUST_CFA_OFFSET 8
  290. popf # reset kernel eflags
  291. CFI_ADJUST_CFA_OFFSET -8
  292. call schedule_tail
  293. GET_THREAD_INFO(%rcx)
  294. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  295. jnz rff_trace
  296. rff_action:
  297. RESTORE_REST
  298. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  299. je int_ret_from_sys_call
  300. testl $_TIF_IA32,TI_flags(%rcx)
  301. jnz int_ret_from_sys_call
  302. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  303. jmp ret_from_sys_call
  304. rff_trace:
  305. movq %rsp,%rdi
  306. call syscall_trace_leave
  307. GET_THREAD_INFO(%rcx)
  308. jmp rff_action
  309. CFI_ENDPROC
  310. END(ret_from_fork)
  311. /*
  312. * System call entry. Upto 6 arguments in registers are supported.
  313. *
  314. * SYSCALL does not save anything on the stack and does not change the
  315. * stack pointer.
  316. */
  317. /*
  318. * Register setup:
  319. * rax system call number
  320. * rdi arg0
  321. * rcx return address for syscall/sysret, C arg3
  322. * rsi arg1
  323. * rdx arg2
  324. * r10 arg3 (--> moved to rcx for C)
  325. * r8 arg4
  326. * r9 arg5
  327. * r11 eflags for syscall/sysret, temporary for C
  328. * r12-r15,rbp,rbx saved by C code, not touched.
  329. *
  330. * Interrupts are off on entry.
  331. * Only called from user space.
  332. *
  333. * XXX if we had a free scratch register we could save the RSP into the stack frame
  334. * and report it properly in ps. Unfortunately we haven't.
  335. *
  336. * When user can change the frames always force IRET. That is because
  337. * it deals with uncanonical addresses better. SYSRET has trouble
  338. * with them due to bugs in both AMD and Intel CPUs.
  339. */
  340. ENTRY(system_call)
  341. CFI_STARTPROC simple
  342. CFI_SIGNAL_FRAME
  343. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  344. CFI_REGISTER rip,rcx
  345. /*CFI_REGISTER rflags,r11*/
  346. SWAPGS_UNSAFE_STACK
  347. /*
  348. * A hypervisor implementation might want to use a label
  349. * after the swapgs, so that it can do the swapgs
  350. * for the guest and jump here on syscall.
  351. */
  352. ENTRY(system_call_after_swapgs)
  353. movq %rsp,%gs:pda_oldrsp
  354. movq %gs:pda_kernelstack,%rsp
  355. /*
  356. * No need to follow this irqs off/on section - it's straight
  357. * and short:
  358. */
  359. ENABLE_INTERRUPTS(CLBR_NONE)
  360. SAVE_ARGS 8,1
  361. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  362. movq %rcx,RIP-ARGOFFSET(%rsp)
  363. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  364. GET_THREAD_INFO(%rcx)
  365. testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
  366. jnz tracesys
  367. system_call_fastpath:
  368. cmpq $__NR_syscall_max,%rax
  369. ja badsys
  370. movq %r10,%rcx
  371. call *sys_call_table(,%rax,8) # XXX: rip relative
  372. movq %rax,RAX-ARGOFFSET(%rsp)
  373. /*
  374. * Syscall return path ending with SYSRET (fast path)
  375. * Has incomplete stack frame and undefined top of stack.
  376. */
  377. ret_from_sys_call:
  378. movl $_TIF_ALLWORK_MASK,%edi
  379. /* edi: flagmask */
  380. sysret_check:
  381. LOCKDEP_SYS_EXIT
  382. GET_THREAD_INFO(%rcx)
  383. DISABLE_INTERRUPTS(CLBR_NONE)
  384. TRACE_IRQS_OFF
  385. movl TI_flags(%rcx),%edx
  386. andl %edi,%edx
  387. jnz sysret_careful
  388. CFI_REMEMBER_STATE
  389. /*
  390. * sysretq will re-enable interrupts:
  391. */
  392. TRACE_IRQS_ON
  393. movq RIP-ARGOFFSET(%rsp),%rcx
  394. CFI_REGISTER rip,rcx
  395. RESTORE_ARGS 0,-ARG_SKIP,1
  396. /*CFI_REGISTER rflags,r11*/
  397. movq %gs:pda_oldrsp, %rsp
  398. USERGS_SYSRET64
  399. CFI_RESTORE_STATE
  400. /* Handle reschedules */
  401. /* edx: work, edi: workmask */
  402. sysret_careful:
  403. bt $TIF_NEED_RESCHED,%edx
  404. jnc sysret_signal
  405. TRACE_IRQS_ON
  406. ENABLE_INTERRUPTS(CLBR_NONE)
  407. pushq %rdi
  408. CFI_ADJUST_CFA_OFFSET 8
  409. call schedule
  410. popq %rdi
  411. CFI_ADJUST_CFA_OFFSET -8
  412. jmp sysret_check
  413. /* Handle a signal */
  414. sysret_signal:
  415. TRACE_IRQS_ON
  416. ENABLE_INTERRUPTS(CLBR_NONE)
  417. #ifdef CONFIG_AUDITSYSCALL
  418. bt $TIF_SYSCALL_AUDIT,%edx
  419. jc sysret_audit
  420. #endif
  421. /* edx: work flags (arg3) */
  422. leaq do_notify_resume(%rip),%rax
  423. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  424. xorl %esi,%esi # oldset -> arg2
  425. call ptregscall_common
  426. movl $_TIF_WORK_MASK,%edi
  427. /* Use IRET because user could have changed frame. This
  428. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  429. DISABLE_INTERRUPTS(CLBR_NONE)
  430. TRACE_IRQS_OFF
  431. jmp int_with_check
  432. badsys:
  433. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  434. jmp ret_from_sys_call
  435. #ifdef CONFIG_AUDITSYSCALL
  436. /*
  437. * Fast path for syscall audit without full syscall trace.
  438. * We just call audit_syscall_entry() directly, and then
  439. * jump back to the normal fast path.
  440. */
  441. auditsys:
  442. movq %r10,%r9 /* 6th arg: 4th syscall arg */
  443. movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
  444. movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
  445. movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
  446. movq %rax,%rsi /* 2nd arg: syscall number */
  447. movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
  448. call audit_syscall_entry
  449. LOAD_ARGS 0 /* reload call-clobbered registers */
  450. jmp system_call_fastpath
  451. /*
  452. * Return fast path for syscall audit. Call audit_syscall_exit()
  453. * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
  454. * masked off.
  455. */
  456. sysret_audit:
  457. movq %rax,%rsi /* second arg, syscall return value */
  458. cmpq $0,%rax /* is it < 0? */
  459. setl %al /* 1 if so, 0 if not */
  460. movzbl %al,%edi /* zero-extend that into %edi */
  461. inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
  462. call audit_syscall_exit
  463. movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
  464. jmp sysret_check
  465. #endif /* CONFIG_AUDITSYSCALL */
  466. /* Do syscall tracing */
  467. tracesys:
  468. #ifdef CONFIG_AUDITSYSCALL
  469. testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
  470. jz auditsys
  471. #endif
  472. SAVE_REST
  473. movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
  474. FIXUP_TOP_OF_STACK %rdi
  475. movq %rsp,%rdi
  476. call syscall_trace_enter
  477. /*
  478. * Reload arg registers from stack in case ptrace changed them.
  479. * We don't reload %rax because syscall_trace_enter() returned
  480. * the value it wants us to use in the table lookup.
  481. */
  482. LOAD_ARGS ARGOFFSET, 1
  483. RESTORE_REST
  484. cmpq $__NR_syscall_max,%rax
  485. ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
  486. movq %r10,%rcx /* fixup for C */
  487. call *sys_call_table(,%rax,8)
  488. movq %rax,RAX-ARGOFFSET(%rsp)
  489. /* Use IRET because user could have changed frame */
  490. /*
  491. * Syscall return path ending with IRET.
  492. * Has correct top of stack, but partial stack frame.
  493. */
  494. .globl int_ret_from_sys_call
  495. .globl int_with_check
  496. int_ret_from_sys_call:
  497. DISABLE_INTERRUPTS(CLBR_NONE)
  498. TRACE_IRQS_OFF
  499. testl $3,CS-ARGOFFSET(%rsp)
  500. je retint_restore_args
  501. movl $_TIF_ALLWORK_MASK,%edi
  502. /* edi: mask to check */
  503. int_with_check:
  504. LOCKDEP_SYS_EXIT_IRQ
  505. GET_THREAD_INFO(%rcx)
  506. movl TI_flags(%rcx),%edx
  507. andl %edi,%edx
  508. jnz int_careful
  509. andl $~TS_COMPAT,TI_status(%rcx)
  510. jmp retint_swapgs
  511. /* Either reschedule or signal or syscall exit tracking needed. */
  512. /* First do a reschedule test. */
  513. /* edx: work, edi: workmask */
  514. int_careful:
  515. bt $TIF_NEED_RESCHED,%edx
  516. jnc int_very_careful
  517. TRACE_IRQS_ON
  518. ENABLE_INTERRUPTS(CLBR_NONE)
  519. pushq %rdi
  520. CFI_ADJUST_CFA_OFFSET 8
  521. call schedule
  522. popq %rdi
  523. CFI_ADJUST_CFA_OFFSET -8
  524. DISABLE_INTERRUPTS(CLBR_NONE)
  525. TRACE_IRQS_OFF
  526. jmp int_with_check
  527. /* handle signals and tracing -- both require a full stack frame */
  528. int_very_careful:
  529. TRACE_IRQS_ON
  530. ENABLE_INTERRUPTS(CLBR_NONE)
  531. SAVE_REST
  532. /* Check for syscall exit trace */
  533. testl $_TIF_WORK_SYSCALL_EXIT,%edx
  534. jz int_signal
  535. pushq %rdi
  536. CFI_ADJUST_CFA_OFFSET 8
  537. leaq 8(%rsp),%rdi # &ptregs -> arg1
  538. call syscall_trace_leave
  539. popq %rdi
  540. CFI_ADJUST_CFA_OFFSET -8
  541. andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
  542. jmp int_restore_rest
  543. int_signal:
  544. testl $_TIF_DO_NOTIFY_MASK,%edx
  545. jz 1f
  546. movq %rsp,%rdi # &ptregs -> arg1
  547. xorl %esi,%esi # oldset -> arg2
  548. call do_notify_resume
  549. 1: movl $_TIF_WORK_MASK,%edi
  550. int_restore_rest:
  551. RESTORE_REST
  552. DISABLE_INTERRUPTS(CLBR_NONE)
  553. TRACE_IRQS_OFF
  554. jmp int_with_check
  555. CFI_ENDPROC
  556. END(system_call)
  557. /*
  558. * Certain special system calls that need to save a complete full stack frame.
  559. */
  560. .macro PTREGSCALL label,func,arg
  561. .globl \label
  562. \label:
  563. leaq \func(%rip),%rax
  564. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  565. jmp ptregscall_common
  566. END(\label)
  567. .endm
  568. CFI_STARTPROC
  569. PTREGSCALL stub_clone, sys_clone, %r8
  570. PTREGSCALL stub_fork, sys_fork, %rdi
  571. PTREGSCALL stub_vfork, sys_vfork, %rdi
  572. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  573. PTREGSCALL stub_iopl, sys_iopl, %rsi
  574. ENTRY(ptregscall_common)
  575. popq %r11
  576. CFI_ADJUST_CFA_OFFSET -8
  577. CFI_REGISTER rip, r11
  578. SAVE_REST
  579. movq %r11, %r15
  580. CFI_REGISTER rip, r15
  581. FIXUP_TOP_OF_STACK %r11
  582. call *%rax
  583. RESTORE_TOP_OF_STACK %r11
  584. movq %r15, %r11
  585. CFI_REGISTER rip, r11
  586. RESTORE_REST
  587. pushq %r11
  588. CFI_ADJUST_CFA_OFFSET 8
  589. CFI_REL_OFFSET rip, 0
  590. ret
  591. CFI_ENDPROC
  592. END(ptregscall_common)
  593. ENTRY(stub_execve)
  594. CFI_STARTPROC
  595. popq %r11
  596. CFI_ADJUST_CFA_OFFSET -8
  597. CFI_REGISTER rip, r11
  598. SAVE_REST
  599. FIXUP_TOP_OF_STACK %r11
  600. movq %rsp, %rcx
  601. call sys_execve
  602. RESTORE_TOP_OF_STACK %r11
  603. movq %rax,RAX(%rsp)
  604. RESTORE_REST
  605. jmp int_ret_from_sys_call
  606. CFI_ENDPROC
  607. END(stub_execve)
  608. /*
  609. * sigreturn is special because it needs to restore all registers on return.
  610. * This cannot be done with SYSRET, so use the IRET return path instead.
  611. */
  612. ENTRY(stub_rt_sigreturn)
  613. CFI_STARTPROC
  614. addq $8, %rsp
  615. CFI_ADJUST_CFA_OFFSET -8
  616. SAVE_REST
  617. movq %rsp,%rdi
  618. FIXUP_TOP_OF_STACK %r11
  619. call sys_rt_sigreturn
  620. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  621. RESTORE_REST
  622. jmp int_ret_from_sys_call
  623. CFI_ENDPROC
  624. END(stub_rt_sigreturn)
  625. /*
  626. * initial frame state for interrupts and exceptions
  627. */
  628. .macro _frame ref
  629. CFI_STARTPROC simple
  630. CFI_SIGNAL_FRAME
  631. CFI_DEF_CFA rsp,SS+8-\ref
  632. /*CFI_REL_OFFSET ss,SS-\ref*/
  633. CFI_REL_OFFSET rsp,RSP-\ref
  634. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  635. /*CFI_REL_OFFSET cs,CS-\ref*/
  636. CFI_REL_OFFSET rip,RIP-\ref
  637. .endm
  638. /* initial frame state for interrupts (and exceptions without error code) */
  639. #define INTR_FRAME _frame RIP
  640. /* initial frame state for exceptions with error code (and interrupts with
  641. vector already pushed) */
  642. #define XCPT_FRAME _frame ORIG_RAX
  643. /*
  644. * Interrupt entry/exit.
  645. *
  646. * Interrupt entry points save only callee clobbered registers in fast path.
  647. *
  648. * Entry runs with interrupts off.
  649. */
  650. /* 0(%rsp): interrupt number */
  651. .macro interrupt func
  652. cld
  653. SAVE_ARGS
  654. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  655. pushq %rbp
  656. /*
  657. * Save rbp twice: One is for marking the stack frame, as usual, and the
  658. * other, to fill pt_regs properly. This is because bx comes right
  659. * before the last saved register in that structure, and not bp. If the
  660. * base pointer were in the place bx is today, this would not be needed.
  661. */
  662. movq %rbp, -8(%rsp)
  663. CFI_ADJUST_CFA_OFFSET 8
  664. CFI_REL_OFFSET rbp, 0
  665. movq %rsp,%rbp
  666. CFI_DEF_CFA_REGISTER rbp
  667. testl $3,CS(%rdi)
  668. je 1f
  669. SWAPGS
  670. /* irqcount is used to check if a CPU is already on an interrupt
  671. stack or not. While this is essentially redundant with preempt_count
  672. it is a little cheaper to use a separate counter in the PDA
  673. (short of moving irq_enter into assembly, which would be too
  674. much work) */
  675. 1: incl %gs:pda_irqcount
  676. cmoveq %gs:pda_irqstackptr,%rsp
  677. push %rbp # backlink for old unwinder
  678. /*
  679. * We entered an interrupt context - irqs are off:
  680. */
  681. TRACE_IRQS_OFF
  682. call \func
  683. .endm
  684. ENTRY(common_interrupt)
  685. XCPT_FRAME
  686. interrupt do_IRQ
  687. /* 0(%rsp): oldrsp-ARGOFFSET */
  688. ret_from_intr:
  689. DISABLE_INTERRUPTS(CLBR_NONE)
  690. TRACE_IRQS_OFF
  691. decl %gs:pda_irqcount
  692. leaveq
  693. CFI_DEF_CFA_REGISTER rsp
  694. CFI_ADJUST_CFA_OFFSET -8
  695. exit_intr:
  696. GET_THREAD_INFO(%rcx)
  697. testl $3,CS-ARGOFFSET(%rsp)
  698. je retint_kernel
  699. /* Interrupt came from user space */
  700. /*
  701. * Has a correct top of stack, but a partial stack frame
  702. * %rcx: thread info. Interrupts off.
  703. */
  704. retint_with_reschedule:
  705. movl $_TIF_WORK_MASK,%edi
  706. retint_check:
  707. LOCKDEP_SYS_EXIT_IRQ
  708. movl TI_flags(%rcx),%edx
  709. andl %edi,%edx
  710. CFI_REMEMBER_STATE
  711. jnz retint_careful
  712. retint_swapgs: /* return to user-space */
  713. /*
  714. * The iretq could re-enable interrupts:
  715. */
  716. DISABLE_INTERRUPTS(CLBR_ANY)
  717. TRACE_IRQS_IRETQ
  718. SWAPGS
  719. jmp restore_args
  720. retint_restore_args: /* return to kernel space */
  721. DISABLE_INTERRUPTS(CLBR_ANY)
  722. /*
  723. * The iretq could re-enable interrupts:
  724. */
  725. TRACE_IRQS_IRETQ
  726. restore_args:
  727. RESTORE_ARGS 0,8,0
  728. irq_return:
  729. INTERRUPT_RETURN
  730. .section __ex_table, "a"
  731. .quad irq_return, bad_iret
  732. .previous
  733. #ifdef CONFIG_PARAVIRT
  734. ENTRY(native_iret)
  735. iretq
  736. .section __ex_table,"a"
  737. .quad native_iret, bad_iret
  738. .previous
  739. #endif
  740. .section .fixup,"ax"
  741. bad_iret:
  742. /*
  743. * The iret traps when the %cs or %ss being restored is bogus.
  744. * We've lost the original trap vector and error code.
  745. * #GPF is the most likely one to get for an invalid selector.
  746. * So pretend we completed the iret and took the #GPF in user mode.
  747. *
  748. * We are now running with the kernel GS after exception recovery.
  749. * But error_entry expects us to have user GS to match the user %cs,
  750. * so swap back.
  751. */
  752. pushq $0
  753. SWAPGS
  754. jmp general_protection
  755. .previous
  756. /* edi: workmask, edx: work */
  757. retint_careful:
  758. CFI_RESTORE_STATE
  759. bt $TIF_NEED_RESCHED,%edx
  760. jnc retint_signal
  761. TRACE_IRQS_ON
  762. ENABLE_INTERRUPTS(CLBR_NONE)
  763. pushq %rdi
  764. CFI_ADJUST_CFA_OFFSET 8
  765. call schedule
  766. popq %rdi
  767. CFI_ADJUST_CFA_OFFSET -8
  768. GET_THREAD_INFO(%rcx)
  769. DISABLE_INTERRUPTS(CLBR_NONE)
  770. TRACE_IRQS_OFF
  771. jmp retint_check
  772. retint_signal:
  773. testl $_TIF_DO_NOTIFY_MASK,%edx
  774. jz retint_swapgs
  775. TRACE_IRQS_ON
  776. ENABLE_INTERRUPTS(CLBR_NONE)
  777. SAVE_REST
  778. movq $-1,ORIG_RAX(%rsp)
  779. xorl %esi,%esi # oldset
  780. movq %rsp,%rdi # &pt_regs
  781. call do_notify_resume
  782. RESTORE_REST
  783. DISABLE_INTERRUPTS(CLBR_NONE)
  784. TRACE_IRQS_OFF
  785. GET_THREAD_INFO(%rcx)
  786. jmp retint_with_reschedule
  787. #ifdef CONFIG_PREEMPT
  788. /* Returning to kernel space. Check if we need preemption */
  789. /* rcx: threadinfo. interrupts off. */
  790. ENTRY(retint_kernel)
  791. cmpl $0,TI_preempt_count(%rcx)
  792. jnz retint_restore_args
  793. bt $TIF_NEED_RESCHED,TI_flags(%rcx)
  794. jnc retint_restore_args
  795. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  796. jnc retint_restore_args
  797. call preempt_schedule_irq
  798. jmp exit_intr
  799. #endif
  800. CFI_ENDPROC
  801. END(common_interrupt)
  802. /*
  803. * APIC interrupts.
  804. */
  805. .macro apicinterrupt num,func
  806. INTR_FRAME
  807. pushq $~(\num)
  808. CFI_ADJUST_CFA_OFFSET 8
  809. interrupt \func
  810. jmp ret_from_intr
  811. CFI_ENDPROC
  812. .endm
  813. ENTRY(thermal_interrupt)
  814. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  815. END(thermal_interrupt)
  816. ENTRY(threshold_interrupt)
  817. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  818. END(threshold_interrupt)
  819. #ifdef CONFIG_SMP
  820. ENTRY(reschedule_interrupt)
  821. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  822. END(reschedule_interrupt)
  823. .macro INVALIDATE_ENTRY num
  824. ENTRY(invalidate_interrupt\num)
  825. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  826. END(invalidate_interrupt\num)
  827. .endm
  828. INVALIDATE_ENTRY 0
  829. INVALIDATE_ENTRY 1
  830. INVALIDATE_ENTRY 2
  831. INVALIDATE_ENTRY 3
  832. INVALIDATE_ENTRY 4
  833. INVALIDATE_ENTRY 5
  834. INVALIDATE_ENTRY 6
  835. INVALIDATE_ENTRY 7
  836. ENTRY(call_function_interrupt)
  837. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  838. END(call_function_interrupt)
  839. ENTRY(call_function_single_interrupt)
  840. apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
  841. END(call_function_single_interrupt)
  842. ENTRY(irq_move_cleanup_interrupt)
  843. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  844. END(irq_move_cleanup_interrupt)
  845. #endif
  846. ENTRY(apic_timer_interrupt)
  847. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  848. END(apic_timer_interrupt)
  849. ENTRY(uv_bau_message_intr1)
  850. apicinterrupt 220,uv_bau_message_interrupt
  851. END(uv_bau_message_intr1)
  852. ENTRY(error_interrupt)
  853. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  854. END(error_interrupt)
  855. ENTRY(spurious_interrupt)
  856. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  857. END(spurious_interrupt)
  858. /*
  859. * Exception entry points.
  860. */
  861. .macro zeroentry sym
  862. INTR_FRAME
  863. PARAVIRT_ADJUST_EXCEPTION_FRAME
  864. pushq $0 /* push error code/oldrax */
  865. CFI_ADJUST_CFA_OFFSET 8
  866. pushq %rax /* push real oldrax to the rdi slot */
  867. CFI_ADJUST_CFA_OFFSET 8
  868. CFI_REL_OFFSET rax,0
  869. leaq \sym(%rip),%rax
  870. jmp error_entry
  871. CFI_ENDPROC
  872. .endm
  873. .macro errorentry sym
  874. XCPT_FRAME
  875. PARAVIRT_ADJUST_EXCEPTION_FRAME
  876. pushq %rax
  877. CFI_ADJUST_CFA_OFFSET 8
  878. CFI_REL_OFFSET rax,0
  879. leaq \sym(%rip),%rax
  880. jmp error_entry
  881. CFI_ENDPROC
  882. .endm
  883. /* error code is on the stack already */
  884. /* handle NMI like exceptions that can happen everywhere */
  885. .macro paranoidentry sym, ist=0, irqtrace=1
  886. SAVE_ALL
  887. cld
  888. movl $1,%ebx
  889. movl $MSR_GS_BASE,%ecx
  890. rdmsr
  891. testl %edx,%edx
  892. js 1f
  893. SWAPGS
  894. xorl %ebx,%ebx
  895. 1:
  896. .if \ist
  897. movq %gs:pda_data_offset, %rbp
  898. .endif
  899. .if \irqtrace
  900. TRACE_IRQS_OFF
  901. .endif
  902. movq %rsp,%rdi
  903. movq ORIG_RAX(%rsp),%rsi
  904. movq $-1,ORIG_RAX(%rsp)
  905. .if \ist
  906. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  907. .endif
  908. call \sym
  909. .if \ist
  910. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  911. .endif
  912. DISABLE_INTERRUPTS(CLBR_NONE)
  913. .if \irqtrace
  914. TRACE_IRQS_OFF
  915. .endif
  916. .endm
  917. /*
  918. * "Paranoid" exit path from exception stack.
  919. * Paranoid because this is used by NMIs and cannot take
  920. * any kernel state for granted.
  921. * We don't do kernel preemption checks here, because only
  922. * NMI should be common and it does not enable IRQs and
  923. * cannot get reschedule ticks.
  924. *
  925. * "trace" is 0 for the NMI handler only, because irq-tracing
  926. * is fundamentally NMI-unsafe. (we cannot change the soft and
  927. * hard flags at once, atomically)
  928. */
  929. .macro paranoidexit trace=1
  930. /* ebx: no swapgs flag */
  931. paranoid_exit\trace:
  932. testl %ebx,%ebx /* swapgs needed? */
  933. jnz paranoid_restore\trace
  934. testl $3,CS(%rsp)
  935. jnz paranoid_userspace\trace
  936. paranoid_swapgs\trace:
  937. .if \trace
  938. TRACE_IRQS_IRETQ 0
  939. .endif
  940. SWAPGS_UNSAFE_STACK
  941. paranoid_restore\trace:
  942. RESTORE_ALL 8
  943. jmp irq_return
  944. paranoid_userspace\trace:
  945. GET_THREAD_INFO(%rcx)
  946. movl TI_flags(%rcx),%ebx
  947. andl $_TIF_WORK_MASK,%ebx
  948. jz paranoid_swapgs\trace
  949. movq %rsp,%rdi /* &pt_regs */
  950. call sync_regs
  951. movq %rax,%rsp /* switch stack for scheduling */
  952. testl $_TIF_NEED_RESCHED,%ebx
  953. jnz paranoid_schedule\trace
  954. movl %ebx,%edx /* arg3: thread flags */
  955. .if \trace
  956. TRACE_IRQS_ON
  957. .endif
  958. ENABLE_INTERRUPTS(CLBR_NONE)
  959. xorl %esi,%esi /* arg2: oldset */
  960. movq %rsp,%rdi /* arg1: &pt_regs */
  961. call do_notify_resume
  962. DISABLE_INTERRUPTS(CLBR_NONE)
  963. .if \trace
  964. TRACE_IRQS_OFF
  965. .endif
  966. jmp paranoid_userspace\trace
  967. paranoid_schedule\trace:
  968. .if \trace
  969. TRACE_IRQS_ON
  970. .endif
  971. ENABLE_INTERRUPTS(CLBR_ANY)
  972. call schedule
  973. DISABLE_INTERRUPTS(CLBR_ANY)
  974. .if \trace
  975. TRACE_IRQS_OFF
  976. .endif
  977. jmp paranoid_userspace\trace
  978. CFI_ENDPROC
  979. .endm
  980. /*
  981. * Exception entry point. This expects an error code/orig_rax on the stack
  982. * and the exception handler in %rax.
  983. */
  984. KPROBE_ENTRY(error_entry)
  985. _frame RDI
  986. CFI_REL_OFFSET rax,0
  987. /* rdi slot contains rax, oldrax contains error code */
  988. cld
  989. subq $14*8,%rsp
  990. CFI_ADJUST_CFA_OFFSET (14*8)
  991. movq %rsi,13*8(%rsp)
  992. CFI_REL_OFFSET rsi,RSI
  993. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  994. CFI_REGISTER rax,rsi
  995. movq %rdx,12*8(%rsp)
  996. CFI_REL_OFFSET rdx,RDX
  997. movq %rcx,11*8(%rsp)
  998. CFI_REL_OFFSET rcx,RCX
  999. movq %rsi,10*8(%rsp) /* store rax */
  1000. CFI_REL_OFFSET rax,RAX
  1001. movq %r8, 9*8(%rsp)
  1002. CFI_REL_OFFSET r8,R8
  1003. movq %r9, 8*8(%rsp)
  1004. CFI_REL_OFFSET r9,R9
  1005. movq %r10,7*8(%rsp)
  1006. CFI_REL_OFFSET r10,R10
  1007. movq %r11,6*8(%rsp)
  1008. CFI_REL_OFFSET r11,R11
  1009. movq %rbx,5*8(%rsp)
  1010. CFI_REL_OFFSET rbx,RBX
  1011. movq %rbp,4*8(%rsp)
  1012. CFI_REL_OFFSET rbp,RBP
  1013. movq %r12,3*8(%rsp)
  1014. CFI_REL_OFFSET r12,R12
  1015. movq %r13,2*8(%rsp)
  1016. CFI_REL_OFFSET r13,R13
  1017. movq %r14,1*8(%rsp)
  1018. CFI_REL_OFFSET r14,R14
  1019. movq %r15,(%rsp)
  1020. CFI_REL_OFFSET r15,R15
  1021. xorl %ebx,%ebx
  1022. testl $3,CS(%rsp)
  1023. je error_kernelspace
  1024. error_swapgs:
  1025. SWAPGS
  1026. error_sti:
  1027. TRACE_IRQS_OFF
  1028. movq %rdi,RDI(%rsp)
  1029. CFI_REL_OFFSET rdi,RDI
  1030. movq %rsp,%rdi
  1031. movq ORIG_RAX(%rsp),%rsi /* get error code */
  1032. movq $-1,ORIG_RAX(%rsp)
  1033. call *%rax
  1034. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  1035. error_exit:
  1036. movl %ebx,%eax
  1037. RESTORE_REST
  1038. DISABLE_INTERRUPTS(CLBR_NONE)
  1039. TRACE_IRQS_OFF
  1040. GET_THREAD_INFO(%rcx)
  1041. testl %eax,%eax
  1042. jne retint_kernel
  1043. LOCKDEP_SYS_EXIT_IRQ
  1044. movl TI_flags(%rcx),%edx
  1045. movl $_TIF_WORK_MASK,%edi
  1046. andl %edi,%edx
  1047. jnz retint_careful
  1048. jmp retint_swapgs
  1049. CFI_ENDPROC
  1050. error_kernelspace:
  1051. incl %ebx
  1052. /* There are two places in the kernel that can potentially fault with
  1053. usergs. Handle them here. The exception handlers after
  1054. iret run with kernel gs again, so don't set the user space flag.
  1055. B stepping K8s sometimes report an truncated RIP for IRET
  1056. exceptions returning to compat mode. Check for these here too. */
  1057. leaq irq_return(%rip),%rcx
  1058. cmpq %rcx,RIP(%rsp)
  1059. je error_swapgs
  1060. movl %ecx,%ecx /* zero extend */
  1061. cmpq %rcx,RIP(%rsp)
  1062. je error_swapgs
  1063. cmpq $gs_change,RIP(%rsp)
  1064. je error_swapgs
  1065. jmp error_sti
  1066. KPROBE_END(error_entry)
  1067. /* Reload gs selector with exception handling */
  1068. /* edi: new selector */
  1069. ENTRY(native_load_gs_index)
  1070. CFI_STARTPROC
  1071. pushf
  1072. CFI_ADJUST_CFA_OFFSET 8
  1073. DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
  1074. SWAPGS
  1075. gs_change:
  1076. movl %edi,%gs
  1077. 2: mfence /* workaround */
  1078. SWAPGS
  1079. popf
  1080. CFI_ADJUST_CFA_OFFSET -8
  1081. ret
  1082. CFI_ENDPROC
  1083. ENDPROC(native_load_gs_index)
  1084. .section __ex_table,"a"
  1085. .align 8
  1086. .quad gs_change,bad_gs
  1087. .previous
  1088. .section .fixup,"ax"
  1089. /* running with kernelgs */
  1090. bad_gs:
  1091. SWAPGS /* switch back to user gs */
  1092. xorl %eax,%eax
  1093. movl %eax,%gs
  1094. jmp 2b
  1095. .previous
  1096. /*
  1097. * Create a kernel thread.
  1098. *
  1099. * C extern interface:
  1100. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  1101. *
  1102. * asm input arguments:
  1103. * rdi: fn, rsi: arg, rdx: flags
  1104. */
  1105. ENTRY(kernel_thread)
  1106. CFI_STARTPROC
  1107. FAKE_STACK_FRAME $child_rip
  1108. SAVE_ALL
  1109. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  1110. movq %rdx,%rdi
  1111. orq kernel_thread_flags(%rip),%rdi
  1112. movq $-1, %rsi
  1113. movq %rsp, %rdx
  1114. xorl %r8d,%r8d
  1115. xorl %r9d,%r9d
  1116. # clone now
  1117. call do_fork
  1118. movq %rax,RAX(%rsp)
  1119. xorl %edi,%edi
  1120. /*
  1121. * It isn't worth to check for reschedule here,
  1122. * so internally to the x86_64 port you can rely on kernel_thread()
  1123. * not to reschedule the child before returning, this avoids the need
  1124. * of hacks for example to fork off the per-CPU idle tasks.
  1125. * [Hopefully no generic code relies on the reschedule -AK]
  1126. */
  1127. RESTORE_ALL
  1128. UNFAKE_STACK_FRAME
  1129. ret
  1130. CFI_ENDPROC
  1131. ENDPROC(kernel_thread)
  1132. child_rip:
  1133. pushq $0 # fake return address
  1134. CFI_STARTPROC
  1135. /*
  1136. * Here we are in the child and the registers are set as they were
  1137. * at kernel_thread() invocation in the parent.
  1138. */
  1139. movq %rdi, %rax
  1140. movq %rsi, %rdi
  1141. call *%rax
  1142. # exit
  1143. mov %eax, %edi
  1144. call do_exit
  1145. CFI_ENDPROC
  1146. ENDPROC(child_rip)
  1147. /*
  1148. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  1149. *
  1150. * C extern interface:
  1151. * extern long execve(char *name, char **argv, char **envp)
  1152. *
  1153. * asm input arguments:
  1154. * rdi: name, rsi: argv, rdx: envp
  1155. *
  1156. * We want to fallback into:
  1157. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
  1158. *
  1159. * do_sys_execve asm fallback arguments:
  1160. * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
  1161. */
  1162. ENTRY(kernel_execve)
  1163. CFI_STARTPROC
  1164. FAKE_STACK_FRAME $0
  1165. SAVE_ALL
  1166. movq %rsp,%rcx
  1167. call sys_execve
  1168. movq %rax, RAX(%rsp)
  1169. RESTORE_REST
  1170. testq %rax,%rax
  1171. je int_ret_from_sys_call
  1172. RESTORE_ARGS
  1173. UNFAKE_STACK_FRAME
  1174. ret
  1175. CFI_ENDPROC
  1176. ENDPROC(kernel_execve)
  1177. KPROBE_ENTRY(page_fault)
  1178. errorentry do_page_fault
  1179. KPROBE_END(page_fault)
  1180. ENTRY(coprocessor_error)
  1181. zeroentry do_coprocessor_error
  1182. END(coprocessor_error)
  1183. ENTRY(simd_coprocessor_error)
  1184. zeroentry do_simd_coprocessor_error
  1185. END(simd_coprocessor_error)
  1186. ENTRY(device_not_available)
  1187. zeroentry do_device_not_available
  1188. END(device_not_available)
  1189. /* runs on exception stack */
  1190. KPROBE_ENTRY(debug)
  1191. INTR_FRAME
  1192. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1193. pushq $0
  1194. CFI_ADJUST_CFA_OFFSET 8
  1195. paranoidentry do_debug, DEBUG_STACK
  1196. paranoidexit
  1197. KPROBE_END(debug)
  1198. /* runs on exception stack */
  1199. KPROBE_ENTRY(nmi)
  1200. INTR_FRAME
  1201. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1202. pushq $-1
  1203. CFI_ADJUST_CFA_OFFSET 8
  1204. paranoidentry do_nmi, 0, 0
  1205. #ifdef CONFIG_TRACE_IRQFLAGS
  1206. paranoidexit 0
  1207. #else
  1208. jmp paranoid_exit1
  1209. CFI_ENDPROC
  1210. #endif
  1211. KPROBE_END(nmi)
  1212. KPROBE_ENTRY(int3)
  1213. INTR_FRAME
  1214. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1215. pushq $0
  1216. CFI_ADJUST_CFA_OFFSET 8
  1217. paranoidentry do_int3, DEBUG_STACK
  1218. jmp paranoid_exit1
  1219. CFI_ENDPROC
  1220. KPROBE_END(int3)
  1221. ENTRY(overflow)
  1222. zeroentry do_overflow
  1223. END(overflow)
  1224. ENTRY(bounds)
  1225. zeroentry do_bounds
  1226. END(bounds)
  1227. ENTRY(invalid_op)
  1228. zeroentry do_invalid_op
  1229. END(invalid_op)
  1230. ENTRY(coprocessor_segment_overrun)
  1231. zeroentry do_coprocessor_segment_overrun
  1232. END(coprocessor_segment_overrun)
  1233. /* runs on exception stack */
  1234. ENTRY(double_fault)
  1235. XCPT_FRAME
  1236. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1237. paranoidentry do_double_fault
  1238. jmp paranoid_exit1
  1239. CFI_ENDPROC
  1240. END(double_fault)
  1241. ENTRY(invalid_TSS)
  1242. errorentry do_invalid_TSS
  1243. END(invalid_TSS)
  1244. ENTRY(segment_not_present)
  1245. errorentry do_segment_not_present
  1246. END(segment_not_present)
  1247. /* runs on exception stack */
  1248. ENTRY(stack_segment)
  1249. XCPT_FRAME
  1250. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1251. paranoidentry do_stack_segment
  1252. jmp paranoid_exit1
  1253. CFI_ENDPROC
  1254. END(stack_segment)
  1255. KPROBE_ENTRY(general_protection)
  1256. errorentry do_general_protection
  1257. KPROBE_END(general_protection)
  1258. ENTRY(alignment_check)
  1259. errorentry do_alignment_check
  1260. END(alignment_check)
  1261. ENTRY(divide_error)
  1262. zeroentry do_divide_error
  1263. END(divide_error)
  1264. ENTRY(spurious_interrupt_bug)
  1265. zeroentry do_spurious_interrupt_bug
  1266. END(spurious_interrupt_bug)
  1267. #ifdef CONFIG_X86_MCE
  1268. /* runs on exception stack */
  1269. ENTRY(machine_check)
  1270. INTR_FRAME
  1271. PARAVIRT_ADJUST_EXCEPTION_FRAME
  1272. pushq $0
  1273. CFI_ADJUST_CFA_OFFSET 8
  1274. paranoidentry do_machine_check
  1275. jmp paranoid_exit1
  1276. CFI_ENDPROC
  1277. END(machine_check)
  1278. #endif
  1279. /* Call softirq on interrupt stack. Interrupts are off. */
  1280. ENTRY(call_softirq)
  1281. CFI_STARTPROC
  1282. push %rbp
  1283. CFI_ADJUST_CFA_OFFSET 8
  1284. CFI_REL_OFFSET rbp,0
  1285. mov %rsp,%rbp
  1286. CFI_DEF_CFA_REGISTER rbp
  1287. incl %gs:pda_irqcount
  1288. cmove %gs:pda_irqstackptr,%rsp
  1289. push %rbp # backlink for old unwinder
  1290. call __do_softirq
  1291. leaveq
  1292. CFI_DEF_CFA_REGISTER rsp
  1293. CFI_ADJUST_CFA_OFFSET -8
  1294. decl %gs:pda_irqcount
  1295. ret
  1296. CFI_ENDPROC
  1297. ENDPROC(call_softirq)
  1298. KPROBE_ENTRY(ignore_sysret)
  1299. CFI_STARTPROC
  1300. mov $-ENOSYS,%eax
  1301. sysret
  1302. CFI_ENDPROC
  1303. ENDPROC(ignore_sysret)
  1304. #ifdef CONFIG_XEN
  1305. ENTRY(xen_hypervisor_callback)
  1306. zeroentry xen_do_hypervisor_callback
  1307. END(xen_hypervisor_callback)
  1308. /*
  1309. # A note on the "critical region" in our callback handler.
  1310. # We want to avoid stacking callback handlers due to events occurring
  1311. # during handling of the last event. To do this, we keep events disabled
  1312. # until we've done all processing. HOWEVER, we must enable events before
  1313. # popping the stack frame (can't be done atomically) and so it would still
  1314. # be possible to get enough handler activations to overflow the stack.
  1315. # Although unlikely, bugs of that kind are hard to track down, so we'd
  1316. # like to avoid the possibility.
  1317. # So, on entry to the handler we detect whether we interrupted an
  1318. # existing activation in its critical region -- if so, we pop the current
  1319. # activation and restart the handler using the previous one.
  1320. */
  1321. ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
  1322. CFI_STARTPROC
  1323. /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
  1324. see the correct pointer to the pt_regs */
  1325. movq %rdi, %rsp # we don't return, adjust the stack frame
  1326. CFI_ENDPROC
  1327. CFI_DEFAULT_STACK
  1328. 11: incl %gs:pda_irqcount
  1329. movq %rsp,%rbp
  1330. CFI_DEF_CFA_REGISTER rbp
  1331. cmovzq %gs:pda_irqstackptr,%rsp
  1332. pushq %rbp # backlink for old unwinder
  1333. call xen_evtchn_do_upcall
  1334. popq %rsp
  1335. CFI_DEF_CFA_REGISTER rsp
  1336. decl %gs:pda_irqcount
  1337. jmp error_exit
  1338. CFI_ENDPROC
  1339. END(do_hypervisor_callback)
  1340. /*
  1341. # Hypervisor uses this for application faults while it executes.
  1342. # We get here for two reasons:
  1343. # 1. Fault while reloading DS, ES, FS or GS
  1344. # 2. Fault while executing IRET
  1345. # Category 1 we do not need to fix up as Xen has already reloaded all segment
  1346. # registers that could be reloaded and zeroed the others.
  1347. # Category 2 we fix up by killing the current process. We cannot use the
  1348. # normal Linux return path in this case because if we use the IRET hypercall
  1349. # to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  1350. # We distinguish between categories by comparing each saved segment register
  1351. # with its current contents: any discrepancy means we in category 1.
  1352. */
  1353. ENTRY(xen_failsafe_callback)
  1354. framesz = (RIP-0x30) /* workaround buggy gas */
  1355. _frame framesz
  1356. CFI_REL_OFFSET rcx, 0
  1357. CFI_REL_OFFSET r11, 8
  1358. movw %ds,%cx
  1359. cmpw %cx,0x10(%rsp)
  1360. CFI_REMEMBER_STATE
  1361. jne 1f
  1362. movw %es,%cx
  1363. cmpw %cx,0x18(%rsp)
  1364. jne 1f
  1365. movw %fs,%cx
  1366. cmpw %cx,0x20(%rsp)
  1367. jne 1f
  1368. movw %gs,%cx
  1369. cmpw %cx,0x28(%rsp)
  1370. jne 1f
  1371. /* All segments match their saved values => Category 2 (Bad IRET). */
  1372. movq (%rsp),%rcx
  1373. CFI_RESTORE rcx
  1374. movq 8(%rsp),%r11
  1375. CFI_RESTORE r11
  1376. addq $0x30,%rsp
  1377. CFI_ADJUST_CFA_OFFSET -0x30
  1378. pushq $0
  1379. CFI_ADJUST_CFA_OFFSET 8
  1380. pushq %r11
  1381. CFI_ADJUST_CFA_OFFSET 8
  1382. pushq %rcx
  1383. CFI_ADJUST_CFA_OFFSET 8
  1384. jmp general_protection
  1385. CFI_RESTORE_STATE
  1386. 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  1387. movq (%rsp),%rcx
  1388. CFI_RESTORE rcx
  1389. movq 8(%rsp),%r11
  1390. CFI_RESTORE r11
  1391. addq $0x30,%rsp
  1392. CFI_ADJUST_CFA_OFFSET -0x30
  1393. pushq $0
  1394. CFI_ADJUST_CFA_OFFSET 8
  1395. SAVE_ALL
  1396. jmp error_exit
  1397. CFI_ENDPROC
  1398. END(xen_failsafe_callback)
  1399. #endif /* CONFIG_XEN */