entry_64.S 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. */
  8. /*
  9. * entry.S contains the system-call and fault low-level handling routines.
  10. *
  11. * NOTE: This code handles signal-recognition, which happens every time
  12. * after an interrupt and after each system call.
  13. *
  14. * Normal syscalls and interrupts don't save a full stack frame, this is
  15. * only done for syscall tracing, signals or fork/exec et.al.
  16. *
  17. * A note on terminology:
  18. * - top of stack: Architecture defined interrupt frame from SS to RIP
  19. * at the top of the kernel process stack.
  20. * - partial stack frame: partially saved registers upto R11.
  21. * - full stack frame: Like partial stack frame, but all register saved.
  22. *
  23. * Some macro usage:
  24. * - CFI macros are used to generate dwarf2 unwind information for better
  25. * backtraces. They don't change any code.
  26. * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
  27. * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
  28. * There are unfortunately lots of special cases where some registers
  29. * not touched. The macro is a big mess that should be cleaned up.
  30. * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
  31. * Gives a full stack frame.
  32. * - ENTRY/END Define functions in the symbol table.
  33. * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
  34. * frame that is otherwise undefined after a SYSCALL
  35. * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  36. * - errorentry/paranoidentry/zeroentry - Define exception entry points.
  37. */
  38. #include <linux/linkage.h>
  39. #include <asm/segment.h>
  40. #include <asm/cache.h>
  41. #include <asm/errno.h>
  42. #include <asm/dwarf2.h>
  43. #include <asm/calling.h>
  44. #include <asm/asm-offsets.h>
  45. #include <asm/msr.h>
  46. #include <asm/unistd.h>
  47. #include <asm/thread_info.h>
  48. #include <asm/hw_irq.h>
  49. #include <asm/page.h>
  50. #include <asm/irqflags.h>
  51. .code64
  52. #ifndef CONFIG_PREEMPT
  53. #define retint_kernel retint_restore_args
  54. #endif
  55. .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
  56. #ifdef CONFIG_TRACE_IRQFLAGS
  57. bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
  58. jnc 1f
  59. TRACE_IRQS_ON
  60. 1:
  61. #endif
  62. .endm
  63. /*
  64. * C code is not supposed to know about undefined top of stack. Every time
  65. * a C function with an pt_regs argument is called from the SYSCALL based
  66. * fast path FIXUP_TOP_OF_STACK is needed.
  67. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  68. * manipulation.
  69. */
  70. /* %rsp:at FRAMEEND */
  71. .macro FIXUP_TOP_OF_STACK tmp
  72. movq %gs:pda_oldrsp,\tmp
  73. movq \tmp,RSP(%rsp)
  74. movq $__USER_DS,SS(%rsp)
  75. movq $__USER_CS,CS(%rsp)
  76. movq $-1,RCX(%rsp)
  77. movq R11(%rsp),\tmp /* get eflags */
  78. movq \tmp,EFLAGS(%rsp)
  79. .endm
  80. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  81. movq RSP-\offset(%rsp),\tmp
  82. movq \tmp,%gs:pda_oldrsp
  83. movq EFLAGS-\offset(%rsp),\tmp
  84. movq \tmp,R11-\offset(%rsp)
  85. .endm
  86. .macro FAKE_STACK_FRAME child_rip
  87. /* push in order ss, rsp, eflags, cs, rip */
  88. xorl %eax, %eax
  89. pushq %rax /* ss */
  90. CFI_ADJUST_CFA_OFFSET 8
  91. /*CFI_REL_OFFSET ss,0*/
  92. pushq %rax /* rsp */
  93. CFI_ADJUST_CFA_OFFSET 8
  94. CFI_REL_OFFSET rsp,0
  95. pushq $(1<<9) /* eflags - interrupts on */
  96. CFI_ADJUST_CFA_OFFSET 8
  97. /*CFI_REL_OFFSET rflags,0*/
  98. pushq $__KERNEL_CS /* cs */
  99. CFI_ADJUST_CFA_OFFSET 8
  100. /*CFI_REL_OFFSET cs,0*/
  101. pushq \child_rip /* rip */
  102. CFI_ADJUST_CFA_OFFSET 8
  103. CFI_REL_OFFSET rip,0
  104. pushq %rax /* orig rax */
  105. CFI_ADJUST_CFA_OFFSET 8
  106. .endm
  107. .macro UNFAKE_STACK_FRAME
  108. addq $8*6, %rsp
  109. CFI_ADJUST_CFA_OFFSET -(6*8)
  110. .endm
  111. .macro CFI_DEFAULT_STACK start=1
  112. .if \start
  113. CFI_STARTPROC simple
  114. CFI_SIGNAL_FRAME
  115. CFI_DEF_CFA rsp,SS+8
  116. .else
  117. CFI_DEF_CFA_OFFSET SS+8
  118. .endif
  119. CFI_REL_OFFSET r15,R15
  120. CFI_REL_OFFSET r14,R14
  121. CFI_REL_OFFSET r13,R13
  122. CFI_REL_OFFSET r12,R12
  123. CFI_REL_OFFSET rbp,RBP
  124. CFI_REL_OFFSET rbx,RBX
  125. CFI_REL_OFFSET r11,R11
  126. CFI_REL_OFFSET r10,R10
  127. CFI_REL_OFFSET r9,R9
  128. CFI_REL_OFFSET r8,R8
  129. CFI_REL_OFFSET rax,RAX
  130. CFI_REL_OFFSET rcx,RCX
  131. CFI_REL_OFFSET rdx,RDX
  132. CFI_REL_OFFSET rsi,RSI
  133. CFI_REL_OFFSET rdi,RDI
  134. CFI_REL_OFFSET rip,RIP
  135. /*CFI_REL_OFFSET cs,CS*/
  136. /*CFI_REL_OFFSET rflags,EFLAGS*/
  137. CFI_REL_OFFSET rsp,RSP
  138. /*CFI_REL_OFFSET ss,SS*/
  139. .endm
  140. /*
  141. * A newly forked process directly context switches into this.
  142. */
  143. /* rdi: prev */
  144. ENTRY(ret_from_fork)
  145. CFI_DEFAULT_STACK
  146. push kernel_eflags(%rip)
  147. CFI_ADJUST_CFA_OFFSET 4
  148. popf # reset kernel eflags
  149. CFI_ADJUST_CFA_OFFSET -4
  150. call schedule_tail
  151. GET_THREAD_INFO(%rcx)
  152. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  153. jnz rff_trace
  154. rff_action:
  155. RESTORE_REST
  156. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  157. je int_ret_from_sys_call
  158. testl $_TIF_IA32,threadinfo_flags(%rcx)
  159. jnz int_ret_from_sys_call
  160. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  161. jmp ret_from_sys_call
  162. rff_trace:
  163. movq %rsp,%rdi
  164. call syscall_trace_leave
  165. GET_THREAD_INFO(%rcx)
  166. jmp rff_action
  167. CFI_ENDPROC
  168. END(ret_from_fork)
  169. /*
  170. * System call entry. Upto 6 arguments in registers are supported.
  171. *
  172. * SYSCALL does not save anything on the stack and does not change the
  173. * stack pointer.
  174. */
  175. /*
  176. * Register setup:
  177. * rax system call number
  178. * rdi arg0
  179. * rcx return address for syscall/sysret, C arg3
  180. * rsi arg1
  181. * rdx arg2
  182. * r10 arg3 (--> moved to rcx for C)
  183. * r8 arg4
  184. * r9 arg5
  185. * r11 eflags for syscall/sysret, temporary for C
  186. * r12-r15,rbp,rbx saved by C code, not touched.
  187. *
  188. * Interrupts are off on entry.
  189. * Only called from user space.
  190. *
  191. * XXX if we had a free scratch register we could save the RSP into the stack frame
  192. * and report it properly in ps. Unfortunately we haven't.
  193. *
  194. * When user can change the frames always force IRET. That is because
  195. * it deals with uncanonical addresses better. SYSRET has trouble
  196. * with them due to bugs in both AMD and Intel CPUs.
  197. */
  198. ENTRY(system_call)
  199. CFI_STARTPROC simple
  200. CFI_SIGNAL_FRAME
  201. CFI_DEF_CFA rsp,PDA_STACKOFFSET
  202. CFI_REGISTER rip,rcx
  203. /*CFI_REGISTER rflags,r11*/
  204. swapgs
  205. movq %rsp,%gs:pda_oldrsp
  206. movq %gs:pda_kernelstack,%rsp
  207. /*
  208. * No need to follow this irqs off/on section - it's straight
  209. * and short:
  210. */
  211. sti
  212. SAVE_ARGS 8,1
  213. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  214. movq %rcx,RIP-ARGOFFSET(%rsp)
  215. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  216. GET_THREAD_INFO(%rcx)
  217. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  218. jnz tracesys
  219. cmpq $__NR_syscall_max,%rax
  220. ja badsys
  221. movq %r10,%rcx
  222. call *sys_call_table(,%rax,8) # XXX: rip relative
  223. movq %rax,RAX-ARGOFFSET(%rsp)
  224. /*
  225. * Syscall return path ending with SYSRET (fast path)
  226. * Has incomplete stack frame and undefined top of stack.
  227. */
  228. ret_from_sys_call:
  229. movl $_TIF_ALLWORK_MASK,%edi
  230. /* edi: flagmask */
  231. sysret_check:
  232. LOCKDEP_SYS_EXIT
  233. GET_THREAD_INFO(%rcx)
  234. cli
  235. TRACE_IRQS_OFF
  236. movl threadinfo_flags(%rcx),%edx
  237. andl %edi,%edx
  238. jnz sysret_careful
  239. CFI_REMEMBER_STATE
  240. /*
  241. * sysretq will re-enable interrupts:
  242. */
  243. TRACE_IRQS_ON
  244. movq RIP-ARGOFFSET(%rsp),%rcx
  245. CFI_REGISTER rip,rcx
  246. RESTORE_ARGS 0,-ARG_SKIP,1
  247. /*CFI_REGISTER rflags,r11*/
  248. movq %gs:pda_oldrsp,%rsp
  249. swapgs
  250. sysretq
  251. CFI_RESTORE_STATE
  252. /* Handle reschedules */
  253. /* edx: work, edi: workmask */
  254. sysret_careful:
  255. bt $TIF_NEED_RESCHED,%edx
  256. jnc sysret_signal
  257. TRACE_IRQS_ON
  258. sti
  259. pushq %rdi
  260. CFI_ADJUST_CFA_OFFSET 8
  261. call schedule
  262. popq %rdi
  263. CFI_ADJUST_CFA_OFFSET -8
  264. jmp sysret_check
  265. /* Handle a signal */
  266. sysret_signal:
  267. TRACE_IRQS_ON
  268. sti
  269. testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
  270. jz 1f
  271. /* Really a signal */
  272. /* edx: work flags (arg3) */
  273. leaq do_notify_resume(%rip),%rax
  274. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  275. xorl %esi,%esi # oldset -> arg2
  276. call ptregscall_common
  277. 1: movl $_TIF_NEED_RESCHED,%edi
  278. /* Use IRET because user could have changed frame. This
  279. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  280. cli
  281. TRACE_IRQS_OFF
  282. jmp int_with_check
  283. badsys:
  284. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  285. jmp ret_from_sys_call
  286. /* Do syscall tracing */
  287. tracesys:
  288. SAVE_REST
  289. movq $-ENOSYS,RAX(%rsp)
  290. FIXUP_TOP_OF_STACK %rdi
  291. movq %rsp,%rdi
  292. call syscall_trace_enter
  293. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  294. RESTORE_REST
  295. cmpq $__NR_syscall_max,%rax
  296. movq $-ENOSYS,%rcx
  297. cmova %rcx,%rax
  298. ja 1f
  299. movq %r10,%rcx /* fixup for C */
  300. call *sys_call_table(,%rax,8)
  301. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  302. /* Use IRET because user could have changed frame */
  303. /*
  304. * Syscall return path ending with IRET.
  305. * Has correct top of stack, but partial stack frame.
  306. */
  307. .globl int_ret_from_sys_call
  308. int_ret_from_sys_call:
  309. cli
  310. TRACE_IRQS_OFF
  311. testl $3,CS-ARGOFFSET(%rsp)
  312. je retint_restore_args
  313. movl $_TIF_ALLWORK_MASK,%edi
  314. /* edi: mask to check */
  315. int_with_check:
  316. LOCKDEP_SYS_EXIT_IRQ
  317. GET_THREAD_INFO(%rcx)
  318. movl threadinfo_flags(%rcx),%edx
  319. andl %edi,%edx
  320. jnz int_careful
  321. andl $~TS_COMPAT,threadinfo_status(%rcx)
  322. jmp retint_swapgs
  323. /* Either reschedule or signal or syscall exit tracking needed. */
  324. /* First do a reschedule test. */
  325. /* edx: work, edi: workmask */
  326. int_careful:
  327. bt $TIF_NEED_RESCHED,%edx
  328. jnc int_very_careful
  329. TRACE_IRQS_ON
  330. sti
  331. pushq %rdi
  332. CFI_ADJUST_CFA_OFFSET 8
  333. call schedule
  334. popq %rdi
  335. CFI_ADJUST_CFA_OFFSET -8
  336. cli
  337. TRACE_IRQS_OFF
  338. jmp int_with_check
  339. /* handle signals and tracing -- both require a full stack frame */
  340. int_very_careful:
  341. TRACE_IRQS_ON
  342. sti
  343. SAVE_REST
  344. /* Check for syscall exit trace */
  345. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  346. jz int_signal
  347. pushq %rdi
  348. CFI_ADJUST_CFA_OFFSET 8
  349. leaq 8(%rsp),%rdi # &ptregs -> arg1
  350. call syscall_trace_leave
  351. popq %rdi
  352. CFI_ADJUST_CFA_OFFSET -8
  353. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  354. jmp int_restore_rest
  355. int_signal:
  356. testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
  357. jz 1f
  358. movq %rsp,%rdi # &ptregs -> arg1
  359. xorl %esi,%esi # oldset -> arg2
  360. call do_notify_resume
  361. 1: movl $_TIF_NEED_RESCHED,%edi
  362. int_restore_rest:
  363. RESTORE_REST
  364. cli
  365. TRACE_IRQS_OFF
  366. jmp int_with_check
  367. CFI_ENDPROC
  368. END(system_call)
  369. /*
  370. * Certain special system calls that need to save a complete full stack frame.
  371. */
  372. .macro PTREGSCALL label,func,arg
  373. .globl \label
  374. \label:
  375. leaq \func(%rip),%rax
  376. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  377. jmp ptregscall_common
  378. END(\label)
  379. .endm
  380. CFI_STARTPROC
  381. PTREGSCALL stub_clone, sys_clone, %r8
  382. PTREGSCALL stub_fork, sys_fork, %rdi
  383. PTREGSCALL stub_vfork, sys_vfork, %rdi
  384. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  385. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  386. PTREGSCALL stub_iopl, sys_iopl, %rsi
  387. ENTRY(ptregscall_common)
  388. popq %r11
  389. CFI_ADJUST_CFA_OFFSET -8
  390. CFI_REGISTER rip, r11
  391. SAVE_REST
  392. movq %r11, %r15
  393. CFI_REGISTER rip, r15
  394. FIXUP_TOP_OF_STACK %r11
  395. call *%rax
  396. RESTORE_TOP_OF_STACK %r11
  397. movq %r15, %r11
  398. CFI_REGISTER rip, r11
  399. RESTORE_REST
  400. pushq %r11
  401. CFI_ADJUST_CFA_OFFSET 8
  402. CFI_REL_OFFSET rip, 0
  403. ret
  404. CFI_ENDPROC
  405. END(ptregscall_common)
  406. ENTRY(stub_execve)
  407. CFI_STARTPROC
  408. popq %r11
  409. CFI_ADJUST_CFA_OFFSET -8
  410. CFI_REGISTER rip, r11
  411. SAVE_REST
  412. FIXUP_TOP_OF_STACK %r11
  413. call sys_execve
  414. RESTORE_TOP_OF_STACK %r11
  415. movq %rax,RAX(%rsp)
  416. RESTORE_REST
  417. jmp int_ret_from_sys_call
  418. CFI_ENDPROC
  419. END(stub_execve)
  420. /*
  421. * sigreturn is special because it needs to restore all registers on return.
  422. * This cannot be done with SYSRET, so use the IRET return path instead.
  423. */
  424. ENTRY(stub_rt_sigreturn)
  425. CFI_STARTPROC
  426. addq $8, %rsp
  427. CFI_ADJUST_CFA_OFFSET -8
  428. SAVE_REST
  429. movq %rsp,%rdi
  430. FIXUP_TOP_OF_STACK %r11
  431. call sys_rt_sigreturn
  432. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  433. RESTORE_REST
  434. jmp int_ret_from_sys_call
  435. CFI_ENDPROC
  436. END(stub_rt_sigreturn)
  437. /*
  438. * initial frame state for interrupts and exceptions
  439. */
  440. .macro _frame ref
  441. CFI_STARTPROC simple
  442. CFI_SIGNAL_FRAME
  443. CFI_DEF_CFA rsp,SS+8-\ref
  444. /*CFI_REL_OFFSET ss,SS-\ref*/
  445. CFI_REL_OFFSET rsp,RSP-\ref
  446. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  447. /*CFI_REL_OFFSET cs,CS-\ref*/
  448. CFI_REL_OFFSET rip,RIP-\ref
  449. .endm
  450. /* initial frame state for interrupts (and exceptions without error code) */
  451. #define INTR_FRAME _frame RIP
  452. /* initial frame state for exceptions with error code (and interrupts with
  453. vector already pushed) */
  454. #define XCPT_FRAME _frame ORIG_RAX
  455. /*
  456. * Interrupt entry/exit.
  457. *
  458. * Interrupt entry points save only callee clobbered registers in fast path.
  459. *
  460. * Entry runs with interrupts off.
  461. */
  462. /* 0(%rsp): interrupt number */
  463. .macro interrupt func
  464. cld
  465. SAVE_ARGS
  466. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  467. pushq %rbp
  468. CFI_ADJUST_CFA_OFFSET 8
  469. CFI_REL_OFFSET rbp, 0
  470. movq %rsp,%rbp
  471. CFI_DEF_CFA_REGISTER rbp
  472. testl $3,CS(%rdi)
  473. je 1f
  474. swapgs
  475. /* irqcount is used to check if a CPU is already on an interrupt
  476. stack or not. While this is essentially redundant with preempt_count
  477. it is a little cheaper to use a separate counter in the PDA
  478. (short of moving irq_enter into assembly, which would be too
  479. much work) */
  480. 1: incl %gs:pda_irqcount
  481. cmoveq %gs:pda_irqstackptr,%rsp
  482. push %rbp # backlink for old unwinder
  483. /*
  484. * We entered an interrupt context - irqs are off:
  485. */
  486. TRACE_IRQS_OFF
  487. call \func
  488. .endm
  489. ENTRY(common_interrupt)
  490. XCPT_FRAME
  491. interrupt do_IRQ
  492. /* 0(%rsp): oldrsp-ARGOFFSET */
  493. ret_from_intr:
  494. cli
  495. TRACE_IRQS_OFF
  496. decl %gs:pda_irqcount
  497. leaveq
  498. CFI_DEF_CFA_REGISTER rsp
  499. CFI_ADJUST_CFA_OFFSET -8
  500. exit_intr:
  501. GET_THREAD_INFO(%rcx)
  502. testl $3,CS-ARGOFFSET(%rsp)
  503. je retint_kernel
  504. /* Interrupt came from user space */
  505. /*
  506. * Has a correct top of stack, but a partial stack frame
  507. * %rcx: thread info. Interrupts off.
  508. */
  509. retint_with_reschedule:
  510. movl $_TIF_WORK_MASK,%edi
  511. retint_check:
  512. LOCKDEP_SYS_EXIT_IRQ
  513. movl threadinfo_flags(%rcx),%edx
  514. andl %edi,%edx
  515. CFI_REMEMBER_STATE
  516. jnz retint_careful
  517. retint_swapgs: /* return to user-space */
  518. /*
  519. * The iretq could re-enable interrupts:
  520. */
  521. cli
  522. TRACE_IRQS_IRETQ
  523. swapgs
  524. jmp restore_args
  525. retint_restore_args: /* return to kernel space */
  526. cli
  527. /*
  528. * The iretq could re-enable interrupts:
  529. */
  530. TRACE_IRQS_IRETQ
  531. restore_args:
  532. RESTORE_ARGS 0,8,0
  533. iret_label:
  534. iretq
  535. .section __ex_table,"a"
  536. .quad iret_label,bad_iret
  537. .previous
  538. .section .fixup,"ax"
  539. /* force a signal here? this matches i386 behaviour */
  540. /* running with kernel gs */
  541. bad_iret:
  542. movq $11,%rdi /* SIGSEGV */
  543. TRACE_IRQS_ON
  544. sti
  545. jmp do_exit
  546. .previous
  547. /* edi: workmask, edx: work */
  548. retint_careful:
  549. CFI_RESTORE_STATE
  550. bt $TIF_NEED_RESCHED,%edx
  551. jnc retint_signal
  552. TRACE_IRQS_ON
  553. sti
  554. pushq %rdi
  555. CFI_ADJUST_CFA_OFFSET 8
  556. call schedule
  557. popq %rdi
  558. CFI_ADJUST_CFA_OFFSET -8
  559. GET_THREAD_INFO(%rcx)
  560. cli
  561. TRACE_IRQS_OFF
  562. jmp retint_check
  563. retint_signal:
  564. testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
  565. jz retint_swapgs
  566. TRACE_IRQS_ON
  567. sti
  568. SAVE_REST
  569. movq $-1,ORIG_RAX(%rsp)
  570. xorl %esi,%esi # oldset
  571. movq %rsp,%rdi # &pt_regs
  572. call do_notify_resume
  573. RESTORE_REST
  574. cli
  575. TRACE_IRQS_OFF
  576. movl $_TIF_NEED_RESCHED,%edi
  577. GET_THREAD_INFO(%rcx)
  578. jmp retint_check
  579. #ifdef CONFIG_PREEMPT
  580. /* Returning to kernel space. Check if we need preemption */
  581. /* rcx: threadinfo. interrupts off. */
  582. ENTRY(retint_kernel)
  583. cmpl $0,threadinfo_preempt_count(%rcx)
  584. jnz retint_restore_args
  585. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  586. jnc retint_restore_args
  587. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  588. jnc retint_restore_args
  589. call preempt_schedule_irq
  590. jmp exit_intr
  591. #endif
  592. CFI_ENDPROC
  593. END(common_interrupt)
  594. /*
  595. * APIC interrupts.
  596. */
  597. .macro apicinterrupt num,func
  598. INTR_FRAME
  599. pushq $~(\num)
  600. CFI_ADJUST_CFA_OFFSET 8
  601. interrupt \func
  602. jmp ret_from_intr
  603. CFI_ENDPROC
  604. .endm
  605. ENTRY(thermal_interrupt)
  606. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  607. END(thermal_interrupt)
  608. ENTRY(threshold_interrupt)
  609. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  610. END(threshold_interrupt)
  611. #ifdef CONFIG_SMP
  612. ENTRY(reschedule_interrupt)
  613. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  614. END(reschedule_interrupt)
  615. .macro INVALIDATE_ENTRY num
  616. ENTRY(invalidate_interrupt\num)
  617. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  618. END(invalidate_interrupt\num)
  619. .endm
  620. INVALIDATE_ENTRY 0
  621. INVALIDATE_ENTRY 1
  622. INVALIDATE_ENTRY 2
  623. INVALIDATE_ENTRY 3
  624. INVALIDATE_ENTRY 4
  625. INVALIDATE_ENTRY 5
  626. INVALIDATE_ENTRY 6
  627. INVALIDATE_ENTRY 7
  628. ENTRY(call_function_interrupt)
  629. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  630. END(call_function_interrupt)
  631. ENTRY(irq_move_cleanup_interrupt)
  632. apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
  633. END(irq_move_cleanup_interrupt)
  634. #endif
  635. ENTRY(apic_timer_interrupt)
  636. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  637. END(apic_timer_interrupt)
  638. ENTRY(error_interrupt)
  639. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  640. END(error_interrupt)
  641. ENTRY(spurious_interrupt)
  642. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  643. END(spurious_interrupt)
  644. /*
  645. * Exception entry points.
  646. */
  647. .macro zeroentry sym
  648. INTR_FRAME
  649. pushq $0 /* push error code/oldrax */
  650. CFI_ADJUST_CFA_OFFSET 8
  651. pushq %rax /* push real oldrax to the rdi slot */
  652. CFI_ADJUST_CFA_OFFSET 8
  653. CFI_REL_OFFSET rax,0
  654. leaq \sym(%rip),%rax
  655. jmp error_entry
  656. CFI_ENDPROC
  657. .endm
  658. .macro errorentry sym
  659. XCPT_FRAME
  660. pushq %rax
  661. CFI_ADJUST_CFA_OFFSET 8
  662. CFI_REL_OFFSET rax,0
  663. leaq \sym(%rip),%rax
  664. jmp error_entry
  665. CFI_ENDPROC
  666. .endm
  667. /* error code is on the stack already */
  668. /* handle NMI like exceptions that can happen everywhere */
  669. .macro paranoidentry sym, ist=0, irqtrace=1
  670. SAVE_ALL
  671. cld
  672. movl $1,%ebx
  673. movl $MSR_GS_BASE,%ecx
  674. rdmsr
  675. testl %edx,%edx
  676. js 1f
  677. swapgs
  678. xorl %ebx,%ebx
  679. 1:
  680. .if \ist
  681. movq %gs:pda_data_offset, %rbp
  682. .endif
  683. movq %rsp,%rdi
  684. movq ORIG_RAX(%rsp),%rsi
  685. movq $-1,ORIG_RAX(%rsp)
  686. .if \ist
  687. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  688. .endif
  689. call \sym
  690. .if \ist
  691. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  692. .endif
  693. cli
  694. .if \irqtrace
  695. TRACE_IRQS_OFF
  696. .endif
  697. .endm
  698. /*
  699. * "Paranoid" exit path from exception stack.
  700. * Paranoid because this is used by NMIs and cannot take
  701. * any kernel state for granted.
  702. * We don't do kernel preemption checks here, because only
  703. * NMI should be common and it does not enable IRQs and
  704. * cannot get reschedule ticks.
  705. *
  706. * "trace" is 0 for the NMI handler only, because irq-tracing
  707. * is fundamentally NMI-unsafe. (we cannot change the soft and
  708. * hard flags at once, atomically)
  709. */
  710. .macro paranoidexit trace=1
  711. /* ebx: no swapgs flag */
  712. paranoid_exit\trace:
  713. testl %ebx,%ebx /* swapgs needed? */
  714. jnz paranoid_restore\trace
  715. testl $3,CS(%rsp)
  716. jnz paranoid_userspace\trace
  717. paranoid_swapgs\trace:
  718. .if \trace
  719. TRACE_IRQS_IRETQ 0
  720. .endif
  721. swapgs
  722. paranoid_restore\trace:
  723. RESTORE_ALL 8
  724. iretq
  725. paranoid_userspace\trace:
  726. GET_THREAD_INFO(%rcx)
  727. movl threadinfo_flags(%rcx),%ebx
  728. andl $_TIF_WORK_MASK,%ebx
  729. jz paranoid_swapgs\trace
  730. movq %rsp,%rdi /* &pt_regs */
  731. call sync_regs
  732. movq %rax,%rsp /* switch stack for scheduling */
  733. testl $_TIF_NEED_RESCHED,%ebx
  734. jnz paranoid_schedule\trace
  735. movl %ebx,%edx /* arg3: thread flags */
  736. .if \trace
  737. TRACE_IRQS_ON
  738. .endif
  739. sti
  740. xorl %esi,%esi /* arg2: oldset */
  741. movq %rsp,%rdi /* arg1: &pt_regs */
  742. call do_notify_resume
  743. cli
  744. .if \trace
  745. TRACE_IRQS_OFF
  746. .endif
  747. jmp paranoid_userspace\trace
  748. paranoid_schedule\trace:
  749. .if \trace
  750. TRACE_IRQS_ON
  751. .endif
  752. sti
  753. call schedule
  754. cli
  755. .if \trace
  756. TRACE_IRQS_OFF
  757. .endif
  758. jmp paranoid_userspace\trace
  759. CFI_ENDPROC
  760. .endm
  761. /*
  762. * Exception entry point. This expects an error code/orig_rax on the stack
  763. * and the exception handler in %rax.
  764. */
  765. KPROBE_ENTRY(error_entry)
  766. _frame RDI
  767. CFI_REL_OFFSET rax,0
  768. /* rdi slot contains rax, oldrax contains error code */
  769. cld
  770. subq $14*8,%rsp
  771. CFI_ADJUST_CFA_OFFSET (14*8)
  772. movq %rsi,13*8(%rsp)
  773. CFI_REL_OFFSET rsi,RSI
  774. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  775. CFI_REGISTER rax,rsi
  776. movq %rdx,12*8(%rsp)
  777. CFI_REL_OFFSET rdx,RDX
  778. movq %rcx,11*8(%rsp)
  779. CFI_REL_OFFSET rcx,RCX
  780. movq %rsi,10*8(%rsp) /* store rax */
  781. CFI_REL_OFFSET rax,RAX
  782. movq %r8, 9*8(%rsp)
  783. CFI_REL_OFFSET r8,R8
  784. movq %r9, 8*8(%rsp)
  785. CFI_REL_OFFSET r9,R9
  786. movq %r10,7*8(%rsp)
  787. CFI_REL_OFFSET r10,R10
  788. movq %r11,6*8(%rsp)
  789. CFI_REL_OFFSET r11,R11
  790. movq %rbx,5*8(%rsp)
  791. CFI_REL_OFFSET rbx,RBX
  792. movq %rbp,4*8(%rsp)
  793. CFI_REL_OFFSET rbp,RBP
  794. movq %r12,3*8(%rsp)
  795. CFI_REL_OFFSET r12,R12
  796. movq %r13,2*8(%rsp)
  797. CFI_REL_OFFSET r13,R13
  798. movq %r14,1*8(%rsp)
  799. CFI_REL_OFFSET r14,R14
  800. movq %r15,(%rsp)
  801. CFI_REL_OFFSET r15,R15
  802. xorl %ebx,%ebx
  803. testl $3,CS(%rsp)
  804. je error_kernelspace
  805. error_swapgs:
  806. swapgs
  807. error_sti:
  808. movq %rdi,RDI(%rsp)
  809. CFI_REL_OFFSET rdi,RDI
  810. movq %rsp,%rdi
  811. movq ORIG_RAX(%rsp),%rsi /* get error code */
  812. movq $-1,ORIG_RAX(%rsp)
  813. call *%rax
  814. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  815. error_exit:
  816. movl %ebx,%eax
  817. RESTORE_REST
  818. cli
  819. TRACE_IRQS_OFF
  820. GET_THREAD_INFO(%rcx)
  821. testl %eax,%eax
  822. jne retint_kernel
  823. LOCKDEP_SYS_EXIT_IRQ
  824. movl threadinfo_flags(%rcx),%edx
  825. movl $_TIF_WORK_MASK,%edi
  826. andl %edi,%edx
  827. jnz retint_careful
  828. jmp retint_swapgs
  829. CFI_ENDPROC
  830. error_kernelspace:
  831. incl %ebx
  832. /* There are two places in the kernel that can potentially fault with
  833. usergs. Handle them here. The exception handlers after
  834. iret run with kernel gs again, so don't set the user space flag.
  835. B stepping K8s sometimes report an truncated RIP for IRET
  836. exceptions returning to compat mode. Check for these here too. */
  837. leaq iret_label(%rip),%rbp
  838. cmpq %rbp,RIP(%rsp)
  839. je error_swapgs
  840. movl %ebp,%ebp /* zero extend */
  841. cmpq %rbp,RIP(%rsp)
  842. je error_swapgs
  843. cmpq $gs_change,RIP(%rsp)
  844. je error_swapgs
  845. jmp error_sti
  846. KPROBE_END(error_entry)
  847. /* Reload gs selector with exception handling */
  848. /* edi: new selector */
  849. ENTRY(load_gs_index)
  850. CFI_STARTPROC
  851. pushf
  852. CFI_ADJUST_CFA_OFFSET 8
  853. cli
  854. swapgs
  855. gs_change:
  856. movl %edi,%gs
  857. 2: mfence /* workaround */
  858. swapgs
  859. popf
  860. CFI_ADJUST_CFA_OFFSET -8
  861. ret
  862. CFI_ENDPROC
  863. ENDPROC(load_gs_index)
  864. .section __ex_table,"a"
  865. .align 8
  866. .quad gs_change,bad_gs
  867. .previous
  868. .section .fixup,"ax"
  869. /* running with kernelgs */
  870. bad_gs:
  871. swapgs /* switch back to user gs */
  872. xorl %eax,%eax
  873. movl %eax,%gs
  874. jmp 2b
  875. .previous
  876. /*
  877. * Create a kernel thread.
  878. *
  879. * C extern interface:
  880. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  881. *
  882. * asm input arguments:
  883. * rdi: fn, rsi: arg, rdx: flags
  884. */
  885. ENTRY(kernel_thread)
  886. CFI_STARTPROC
  887. FAKE_STACK_FRAME $child_rip
  888. SAVE_ALL
  889. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  890. movq %rdx,%rdi
  891. orq kernel_thread_flags(%rip),%rdi
  892. movq $-1, %rsi
  893. movq %rsp, %rdx
  894. xorl %r8d,%r8d
  895. xorl %r9d,%r9d
  896. # clone now
  897. call do_fork
  898. movq %rax,RAX(%rsp)
  899. xorl %edi,%edi
  900. /*
  901. * It isn't worth to check for reschedule here,
  902. * so internally to the x86_64 port you can rely on kernel_thread()
  903. * not to reschedule the child before returning, this avoids the need
  904. * of hacks for example to fork off the per-CPU idle tasks.
  905. * [Hopefully no generic code relies on the reschedule -AK]
  906. */
  907. RESTORE_ALL
  908. UNFAKE_STACK_FRAME
  909. ret
  910. CFI_ENDPROC
  911. ENDPROC(kernel_thread)
  912. child_rip:
  913. pushq $0 # fake return address
  914. CFI_STARTPROC
  915. /*
  916. * Here we are in the child and the registers are set as they were
  917. * at kernel_thread() invocation in the parent.
  918. */
  919. movq %rdi, %rax
  920. movq %rsi, %rdi
  921. call *%rax
  922. # exit
  923. mov %eax, %edi
  924. call do_exit
  925. CFI_ENDPROC
  926. ENDPROC(child_rip)
  927. /*
  928. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  929. *
  930. * C extern interface:
  931. * extern long execve(char *name, char **argv, char **envp)
  932. *
  933. * asm input arguments:
  934. * rdi: name, rsi: argv, rdx: envp
  935. *
  936. * We want to fallback into:
  937. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  938. *
  939. * do_sys_execve asm fallback arguments:
  940. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  941. */
  942. ENTRY(kernel_execve)
  943. CFI_STARTPROC
  944. FAKE_STACK_FRAME $0
  945. SAVE_ALL
  946. call sys_execve
  947. movq %rax, RAX(%rsp)
  948. RESTORE_REST
  949. testq %rax,%rax
  950. je int_ret_from_sys_call
  951. RESTORE_ARGS
  952. UNFAKE_STACK_FRAME
  953. ret
  954. CFI_ENDPROC
  955. ENDPROC(kernel_execve)
  956. KPROBE_ENTRY(page_fault)
  957. errorentry do_page_fault
  958. KPROBE_END(page_fault)
  959. ENTRY(coprocessor_error)
  960. zeroentry do_coprocessor_error
  961. END(coprocessor_error)
  962. ENTRY(simd_coprocessor_error)
  963. zeroentry do_simd_coprocessor_error
  964. END(simd_coprocessor_error)
  965. ENTRY(device_not_available)
  966. zeroentry math_state_restore
  967. END(device_not_available)
  968. /* runs on exception stack */
  969. KPROBE_ENTRY(debug)
  970. INTR_FRAME
  971. pushq $0
  972. CFI_ADJUST_CFA_OFFSET 8
  973. paranoidentry do_debug, DEBUG_STACK
  974. paranoidexit
  975. KPROBE_END(debug)
  976. /* runs on exception stack */
  977. KPROBE_ENTRY(nmi)
  978. INTR_FRAME
  979. pushq $-1
  980. CFI_ADJUST_CFA_OFFSET 8
  981. paranoidentry do_nmi, 0, 0
  982. #ifdef CONFIG_TRACE_IRQFLAGS
  983. paranoidexit 0
  984. #else
  985. jmp paranoid_exit1
  986. CFI_ENDPROC
  987. #endif
  988. KPROBE_END(nmi)
  989. KPROBE_ENTRY(int3)
  990. INTR_FRAME
  991. pushq $0
  992. CFI_ADJUST_CFA_OFFSET 8
  993. paranoidentry do_int3, DEBUG_STACK
  994. jmp paranoid_exit1
  995. CFI_ENDPROC
  996. KPROBE_END(int3)
  997. ENTRY(overflow)
  998. zeroentry do_overflow
  999. END(overflow)
  1000. ENTRY(bounds)
  1001. zeroentry do_bounds
  1002. END(bounds)
  1003. ENTRY(invalid_op)
  1004. zeroentry do_invalid_op
  1005. END(invalid_op)
  1006. ENTRY(coprocessor_segment_overrun)
  1007. zeroentry do_coprocessor_segment_overrun
  1008. END(coprocessor_segment_overrun)
  1009. ENTRY(reserved)
  1010. zeroentry do_reserved
  1011. END(reserved)
  1012. /* runs on exception stack */
  1013. ENTRY(double_fault)
  1014. XCPT_FRAME
  1015. paranoidentry do_double_fault
  1016. jmp paranoid_exit1
  1017. CFI_ENDPROC
  1018. END(double_fault)
  1019. ENTRY(invalid_TSS)
  1020. errorentry do_invalid_TSS
  1021. END(invalid_TSS)
  1022. ENTRY(segment_not_present)
  1023. errorentry do_segment_not_present
  1024. END(segment_not_present)
  1025. /* runs on exception stack */
  1026. ENTRY(stack_segment)
  1027. XCPT_FRAME
  1028. paranoidentry do_stack_segment
  1029. jmp paranoid_exit1
  1030. CFI_ENDPROC
  1031. END(stack_segment)
  1032. KPROBE_ENTRY(general_protection)
  1033. errorentry do_general_protection
  1034. KPROBE_END(general_protection)
  1035. ENTRY(alignment_check)
  1036. errorentry do_alignment_check
  1037. END(alignment_check)
  1038. ENTRY(divide_error)
  1039. zeroentry do_divide_error
  1040. END(divide_error)
  1041. ENTRY(spurious_interrupt_bug)
  1042. zeroentry do_spurious_interrupt_bug
  1043. END(spurious_interrupt_bug)
  1044. #ifdef CONFIG_X86_MCE
  1045. /* runs on exception stack */
  1046. ENTRY(machine_check)
  1047. INTR_FRAME
  1048. pushq $0
  1049. CFI_ADJUST_CFA_OFFSET 8
  1050. paranoidentry do_machine_check
  1051. jmp paranoid_exit1
  1052. CFI_ENDPROC
  1053. END(machine_check)
  1054. #endif
  1055. /* Call softirq on interrupt stack. Interrupts are off. */
  1056. ENTRY(call_softirq)
  1057. CFI_STARTPROC
  1058. push %rbp
  1059. CFI_ADJUST_CFA_OFFSET 8
  1060. CFI_REL_OFFSET rbp,0
  1061. mov %rsp,%rbp
  1062. CFI_DEF_CFA_REGISTER rbp
  1063. incl %gs:pda_irqcount
  1064. cmove %gs:pda_irqstackptr,%rsp
  1065. push %rbp # backlink for old unwinder
  1066. call __do_softirq
  1067. leaveq
  1068. CFI_DEF_CFA_REGISTER rsp
  1069. CFI_ADJUST_CFA_OFFSET -8
  1070. decl %gs:pda_irqcount
  1071. ret
  1072. CFI_ENDPROC
  1073. ENDPROC(call_softirq)
  1074. KPROBE_ENTRY(ignore_sysret)
  1075. CFI_STARTPROC
  1076. mov $-ENOSYS,%eax
  1077. sysret
  1078. CFI_ENDPROC
  1079. ENDPROC(ignore_sysret)