entry.S 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. #include <asm/page.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. /*
  48. * C code is not supposed to know about undefined top of stack. Every time
  49. * a C function with an pt_regs argument is called from the SYSCALL based
  50. * fast path FIXUP_TOP_OF_STACK is needed.
  51. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  52. * manipulation.
  53. */
  54. /* %rsp:at FRAMEEND */
  55. .macro FIXUP_TOP_OF_STACK tmp
  56. movq %gs:pda_oldrsp,\tmp
  57. movq \tmp,RSP(%rsp)
  58. movq $__USER_DS,SS(%rsp)
  59. movq $__USER_CS,CS(%rsp)
  60. movq $-1,RCX(%rsp)
  61. movq R11(%rsp),\tmp /* get eflags */
  62. movq \tmp,EFLAGS(%rsp)
  63. .endm
  64. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  65. movq RSP-\offset(%rsp),\tmp
  66. movq \tmp,%gs:pda_oldrsp
  67. movq EFLAGS-\offset(%rsp),\tmp
  68. movq \tmp,R11-\offset(%rsp)
  69. .endm
  70. .macro FAKE_STACK_FRAME child_rip
  71. /* push in order ss, rsp, eflags, cs, rip */
  72. xorl %eax, %eax
  73. pushq %rax /* ss */
  74. CFI_ADJUST_CFA_OFFSET 8
  75. /*CFI_REL_OFFSET ss,0*/
  76. pushq %rax /* rsp */
  77. CFI_ADJUST_CFA_OFFSET 8
  78. CFI_REL_OFFSET rsp,0
  79. pushq $(1<<9) /* eflags - interrupts on */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET rflags,0*/
  82. pushq $__KERNEL_CS /* cs */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. /*CFI_REL_OFFSET cs,0*/
  85. pushq \child_rip /* rip */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. CFI_REL_OFFSET rip,0
  88. pushq %rax /* orig rax */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. .endm
  91. .macro UNFAKE_STACK_FRAME
  92. addq $8*6, %rsp
  93. CFI_ADJUST_CFA_OFFSET -(6*8)
  94. .endm
  95. .macro CFI_DEFAULT_STACK start=1
  96. .if \start
  97. CFI_STARTPROC simple
  98. CFI_DEF_CFA rsp,SS+8
  99. .else
  100. CFI_DEF_CFA_OFFSET SS+8
  101. .endif
  102. CFI_REL_OFFSET r15,R15
  103. CFI_REL_OFFSET r14,R14
  104. CFI_REL_OFFSET r13,R13
  105. CFI_REL_OFFSET r12,R12
  106. CFI_REL_OFFSET rbp,RBP
  107. CFI_REL_OFFSET rbx,RBX
  108. CFI_REL_OFFSET r11,R11
  109. CFI_REL_OFFSET r10,R10
  110. CFI_REL_OFFSET r9,R9
  111. CFI_REL_OFFSET r8,R8
  112. CFI_REL_OFFSET rax,RAX
  113. CFI_REL_OFFSET rcx,RCX
  114. CFI_REL_OFFSET rdx,RDX
  115. CFI_REL_OFFSET rsi,RSI
  116. CFI_REL_OFFSET rdi,RDI
  117. CFI_REL_OFFSET rip,RIP
  118. /*CFI_REL_OFFSET cs,CS*/
  119. /*CFI_REL_OFFSET rflags,EFLAGS*/
  120. CFI_REL_OFFSET rsp,RSP
  121. /*CFI_REL_OFFSET ss,SS*/
  122. .endm
  123. /*
  124. * A newly forked process directly context switches into this.
  125. */
  126. /* rdi: prev */
  127. ENTRY(ret_from_fork)
  128. CFI_DEFAULT_STACK
  129. call schedule_tail
  130. GET_THREAD_INFO(%rcx)
  131. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  132. jnz rff_trace
  133. rff_action:
  134. RESTORE_REST
  135. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  136. je int_ret_from_sys_call
  137. testl $_TIF_IA32,threadinfo_flags(%rcx)
  138. jnz int_ret_from_sys_call
  139. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  140. jmp ret_from_sys_call
  141. rff_trace:
  142. movq %rsp,%rdi
  143. call syscall_trace_leave
  144. GET_THREAD_INFO(%rcx)
  145. jmp rff_action
  146. CFI_ENDPROC
  147. /*
  148. * System call entry. Upto 6 arguments in registers are supported.
  149. *
  150. * SYSCALL does not save anything on the stack and does not change the
  151. * stack pointer.
  152. */
  153. /*
  154. * Register setup:
  155. * rax system call number
  156. * rdi arg0
  157. * rcx return address for syscall/sysret, C arg3
  158. * rsi arg1
  159. * rdx arg2
  160. * r10 arg3 (--> moved to rcx for C)
  161. * r8 arg4
  162. * r9 arg5
  163. * r11 eflags for syscall/sysret, temporary for C
  164. * r12-r15,rbp,rbx saved by C code, not touched.
  165. *
  166. * Interrupts are off on entry.
  167. * Only called from user space.
  168. *
  169. * EM64T CPUs have somewhat weird error reporting for non canonical RIPs in SYSRET.
  170. * We can't handle any exceptions there because the exception handler would
  171. * end up running on the user stack which is unsafe. To avoid problems
  172. * any code that might end up with a user touched pt_regs should return
  173. * using int_ret_from_syscall.
  174. *
  175. * XXX if we had a free scratch register we could save the RSP into the stack frame
  176. * and report it properly in ps. Unfortunately we haven't.
  177. */
  178. ENTRY(system_call)
  179. CFI_STARTPROC simple
  180. CFI_DEF_CFA rsp,0
  181. CFI_REGISTER rip,rcx
  182. /*CFI_REGISTER rflags,r11*/
  183. swapgs
  184. movq %rsp,%gs:pda_oldrsp
  185. movq %gs:pda_kernelstack,%rsp
  186. sti
  187. SAVE_ARGS 8,1
  188. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  189. movq %rcx,RIP-ARGOFFSET(%rsp)
  190. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  191. GET_THREAD_INFO(%rcx)
  192. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  193. CFI_REMEMBER_STATE
  194. jnz tracesys
  195. cmpq $__NR_syscall_max,%rax
  196. ja badsys
  197. movq %r10,%rcx
  198. call *sys_call_table(,%rax,8) # XXX: rip relative
  199. movq %rax,RAX-ARGOFFSET(%rsp)
  200. /*
  201. * Syscall return path ending with SYSRET (fast path)
  202. * Has incomplete stack frame and undefined top of stack.
  203. */
  204. .globl ret_from_sys_call
  205. ret_from_sys_call:
  206. movl $_TIF_ALLWORK_MASK,%edi
  207. /* edi: flagmask */
  208. sysret_check:
  209. GET_THREAD_INFO(%rcx)
  210. cli
  211. movl threadinfo_flags(%rcx),%edx
  212. andl %edi,%edx
  213. CFI_REMEMBER_STATE
  214. jnz sysret_careful
  215. movq RIP-ARGOFFSET(%rsp),%rcx
  216. CFI_REGISTER rip,rcx
  217. RESTORE_ARGS 0,-ARG_SKIP,1
  218. /*CFI_REGISTER rflags,r11*/
  219. movq %gs:pda_oldrsp,%rsp
  220. swapgs
  221. sysretq
  222. /* Handle reschedules */
  223. /* edx: work, edi: workmask */
  224. sysret_careful:
  225. CFI_RESTORE_STATE
  226. bt $TIF_NEED_RESCHED,%edx
  227. jnc sysret_signal
  228. sti
  229. pushq %rdi
  230. CFI_ADJUST_CFA_OFFSET 8
  231. call schedule
  232. popq %rdi
  233. CFI_ADJUST_CFA_OFFSET -8
  234. jmp sysret_check
  235. /* Handle a signal */
  236. sysret_signal:
  237. sti
  238. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  239. jz 1f
  240. /* Really a signal */
  241. /* edx: work flags (arg3) */
  242. leaq do_notify_resume(%rip),%rax
  243. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  244. xorl %esi,%esi # oldset -> arg2
  245. call ptregscall_common
  246. 1: movl $_TIF_NEED_RESCHED,%edi
  247. /* Stack frame might have been changed. The IRET path does
  248. some additional checks to handle this */
  249. jmp int_with_check
  250. badsys:
  251. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  252. jmp ret_from_sys_call
  253. /* Do syscall tracing */
  254. tracesys:
  255. CFI_RESTORE_STATE
  256. SAVE_REST
  257. movq $-ENOSYS,RAX(%rsp)
  258. FIXUP_TOP_OF_STACK %rdi
  259. movq %rsp,%rdi
  260. call syscall_trace_enter
  261. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  262. RESTORE_REST
  263. cmpq $__NR_syscall_max,%rax
  264. ja 1f
  265. movq %r10,%rcx /* fixup for C */
  266. call *sys_call_table(,%rax,8)
  267. movq %rax,RAX-ARGOFFSET(%rsp)
  268. 1: SAVE_REST
  269. movq %rsp,%rdi
  270. call syscall_trace_leave
  271. RESTORE_TOP_OF_STACK %rbx
  272. RESTORE_REST
  273. /* Stack frame might have been changed. Use the more careful IRET path */
  274. jmp int_ret_from_sys_call
  275. CFI_ENDPROC
  276. /*
  277. * Syscall return path ending with IRET.
  278. * Has correct top of stack, but partial stack frame.
  279. */
  280. ENTRY(int_ret_from_sys_call)
  281. CFI_STARTPROC simple
  282. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  283. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  284. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  285. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  286. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  287. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  288. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  289. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  290. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  291. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  292. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  293. CFI_REL_OFFSET r8,R8-ARGOFFSET
  294. CFI_REL_OFFSET r9,R9-ARGOFFSET
  295. CFI_REL_OFFSET r10,R10-ARGOFFSET
  296. CFI_REL_OFFSET r11,R11-ARGOFFSET
  297. cli
  298. testl $3,CS-ARGOFFSET(%rsp)
  299. je retint_restore_args
  300. movl $_TIF_ALLWORK_MASK,%edi
  301. /* edi: mask to check */
  302. int_with_check:
  303. GET_THREAD_INFO(%rcx)
  304. movl threadinfo_flags(%rcx),%edx
  305. andl %edi,%edx
  306. jnz int_careful
  307. andl $~TS_COMPAT,threadinfo_status(%rcx)
  308. jmp retint_swapgs
  309. /* Either reschedule or signal or syscall exit tracking needed. */
  310. /* First do a reschedule test. */
  311. /* edx: work, edi: workmask */
  312. int_careful:
  313. bt $TIF_NEED_RESCHED,%edx
  314. jnc int_very_careful
  315. sti
  316. pushq %rdi
  317. CFI_ADJUST_CFA_OFFSET 8
  318. call schedule
  319. popq %rdi
  320. CFI_ADJUST_CFA_OFFSET -8
  321. cli
  322. jmp int_with_check
  323. /* handle signals and tracing -- both require a full stack frame */
  324. int_very_careful:
  325. sti
  326. SAVE_REST
  327. /* Check for syscall exit trace */
  328. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  329. jz int_signal
  330. pushq %rdi
  331. CFI_ADJUST_CFA_OFFSET 8
  332. leaq 8(%rsp),%rdi # &ptregs -> arg1
  333. call syscall_trace_leave
  334. popq %rdi
  335. CFI_ADJUST_CFA_OFFSET -8
  336. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  337. cli
  338. jmp int_restore_rest
  339. int_signal:
  340. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  341. jz 1f
  342. movq %rsp,%rdi # &ptregs -> arg1
  343. xorl %esi,%esi # oldset -> arg2
  344. call do_notify_resume
  345. 1: movl $_TIF_NEED_RESCHED,%edi
  346. int_restore_rest:
  347. RESTORE_REST
  348. cli
  349. jmp int_with_check
  350. CFI_ENDPROC
  351. /*
  352. * Certain special system calls that need to save a complete full stack frame.
  353. */
  354. .macro PTREGSCALL label,func,arg
  355. .globl \label
  356. \label:
  357. leaq \func(%rip),%rax
  358. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  359. jmp ptregscall_common
  360. .endm
  361. CFI_STARTPROC
  362. PTREGSCALL stub_clone, sys_clone, %r8
  363. PTREGSCALL stub_fork, sys_fork, %rdi
  364. PTREGSCALL stub_vfork, sys_vfork, %rdi
  365. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  366. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  367. PTREGSCALL stub_iopl, sys_iopl, %rsi
  368. ENTRY(ptregscall_common)
  369. popq %r11
  370. CFI_ADJUST_CFA_OFFSET -8
  371. CFI_REGISTER rip, r11
  372. SAVE_REST
  373. movq %r11, %r15
  374. CFI_REGISTER rip, r15
  375. FIXUP_TOP_OF_STACK %r11
  376. call *%rax
  377. RESTORE_TOP_OF_STACK %r11
  378. movq %r15, %r11
  379. CFI_REGISTER rip, r11
  380. RESTORE_REST
  381. pushq %r11
  382. CFI_ADJUST_CFA_OFFSET 8
  383. CFI_REL_OFFSET rip, 0
  384. ret
  385. CFI_ENDPROC
  386. ENTRY(stub_execve)
  387. CFI_STARTPROC
  388. popq %r11
  389. CFI_ADJUST_CFA_OFFSET -8
  390. CFI_REGISTER rip, r11
  391. SAVE_REST
  392. FIXUP_TOP_OF_STACK %r11
  393. call sys_execve
  394. RESTORE_TOP_OF_STACK %r11
  395. movq %rax,RAX(%rsp)
  396. RESTORE_REST
  397. jmp int_ret_from_sys_call
  398. CFI_ENDPROC
  399. /*
  400. * sigreturn is special because it needs to restore all registers on return.
  401. * This cannot be done with SYSRET, so use the IRET return path instead.
  402. */
  403. ENTRY(stub_rt_sigreturn)
  404. CFI_STARTPROC
  405. addq $8, %rsp
  406. CFI_ADJUST_CFA_OFFSET -8
  407. SAVE_REST
  408. movq %rsp,%rdi
  409. FIXUP_TOP_OF_STACK %r11
  410. call sys_rt_sigreturn
  411. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  412. RESTORE_REST
  413. jmp int_ret_from_sys_call
  414. CFI_ENDPROC
  415. /*
  416. * initial frame state for interrupts and exceptions
  417. */
  418. .macro _frame ref
  419. CFI_STARTPROC simple
  420. CFI_DEF_CFA rsp,SS+8-\ref
  421. /*CFI_REL_OFFSET ss,SS-\ref*/
  422. CFI_REL_OFFSET rsp,RSP-\ref
  423. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  424. /*CFI_REL_OFFSET cs,CS-\ref*/
  425. CFI_REL_OFFSET rip,RIP-\ref
  426. .endm
  427. /* initial frame state for interrupts (and exceptions without error code) */
  428. #define INTR_FRAME _frame RIP
  429. /* initial frame state for exceptions with error code (and interrupts with
  430. vector already pushed) */
  431. #define XCPT_FRAME _frame ORIG_RAX
  432. /*
  433. * Interrupt entry/exit.
  434. *
  435. * Interrupt entry points save only callee clobbered registers in fast path.
  436. *
  437. * Entry runs with interrupts off.
  438. */
  439. /* 0(%rsp): interrupt number */
  440. .macro interrupt func
  441. cld
  442. #ifdef CONFIG_DEBUG_INFO
  443. SAVE_ALL
  444. movq %rsp,%rdi
  445. /*
  446. * Setup a stack frame pointer. This allows gdb to trace
  447. * back to the original stack.
  448. */
  449. movq %rsp,%rbp
  450. CFI_DEF_CFA_REGISTER rbp
  451. #else
  452. SAVE_ARGS
  453. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  454. #endif
  455. testl $3,CS(%rdi)
  456. je 1f
  457. swapgs
  458. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  459. movq %gs:pda_irqstackptr,%rax
  460. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  461. pushq %rdi # save old stack
  462. #ifndef CONFIG_DEBUG_INFO
  463. CFI_ADJUST_CFA_OFFSET 8
  464. #endif
  465. call \func
  466. .endm
  467. ENTRY(common_interrupt)
  468. XCPT_FRAME
  469. interrupt do_IRQ
  470. /* 0(%rsp): oldrsp-ARGOFFSET */
  471. ret_from_intr:
  472. popq %rdi
  473. #ifndef CONFIG_DEBUG_INFO
  474. CFI_ADJUST_CFA_OFFSET -8
  475. #endif
  476. cli
  477. decl %gs:pda_irqcount
  478. #ifdef CONFIG_DEBUG_INFO
  479. movq RBP(%rdi),%rbp
  480. CFI_DEF_CFA_REGISTER rsp
  481. #endif
  482. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  483. exit_intr:
  484. GET_THREAD_INFO(%rcx)
  485. testl $3,CS-ARGOFFSET(%rsp)
  486. je retint_kernel
  487. /* Interrupt came from user space */
  488. /*
  489. * Has a correct top of stack, but a partial stack frame
  490. * %rcx: thread info. Interrupts off.
  491. */
  492. retint_with_reschedule:
  493. movl $_TIF_WORK_MASK,%edi
  494. retint_check:
  495. movl threadinfo_flags(%rcx),%edx
  496. andl %edi,%edx
  497. CFI_REMEMBER_STATE
  498. jnz retint_careful
  499. retint_swapgs:
  500. swapgs
  501. retint_restore_args:
  502. cli
  503. RESTORE_ARGS 0,8,0
  504. iret_label:
  505. iretq
  506. .section __ex_table,"a"
  507. .quad iret_label,bad_iret
  508. .previous
  509. .section .fixup,"ax"
  510. /* force a signal here? this matches i386 behaviour */
  511. /* running with kernel gs */
  512. bad_iret:
  513. movq $-9999,%rdi /* better code? */
  514. sti
  515. jmp do_exit
  516. .previous
  517. /* edi: workmask, edx: work */
  518. retint_careful:
  519. CFI_RESTORE_STATE
  520. bt $TIF_NEED_RESCHED,%edx
  521. jnc retint_signal
  522. sti
  523. pushq %rdi
  524. CFI_ADJUST_CFA_OFFSET 8
  525. call schedule
  526. popq %rdi
  527. CFI_ADJUST_CFA_OFFSET -8
  528. GET_THREAD_INFO(%rcx)
  529. cli
  530. jmp retint_check
  531. retint_signal:
  532. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  533. jz retint_swapgs
  534. sti
  535. SAVE_REST
  536. movq $-1,ORIG_RAX(%rsp)
  537. xorl %esi,%esi # oldset
  538. movq %rsp,%rdi # &pt_regs
  539. call do_notify_resume
  540. RESTORE_REST
  541. cli
  542. movl $_TIF_NEED_RESCHED,%edi
  543. GET_THREAD_INFO(%rcx)
  544. jmp retint_check
  545. #ifdef CONFIG_PREEMPT
  546. /* Returning to kernel space. Check if we need preemption */
  547. /* rcx: threadinfo. interrupts off. */
  548. .p2align
  549. retint_kernel:
  550. cmpl $0,threadinfo_preempt_count(%rcx)
  551. jnz retint_restore_args
  552. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  553. jnc retint_restore_args
  554. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  555. jnc retint_restore_args
  556. call preempt_schedule_irq
  557. jmp exit_intr
  558. #endif
  559. CFI_ENDPROC
  560. /*
  561. * APIC interrupts.
  562. */
  563. .macro apicinterrupt num,func
  564. INTR_FRAME
  565. pushq $\num-256
  566. CFI_ADJUST_CFA_OFFSET 8
  567. interrupt \func
  568. jmp ret_from_intr
  569. CFI_ENDPROC
  570. .endm
  571. ENTRY(thermal_interrupt)
  572. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  573. ENTRY(threshold_interrupt)
  574. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  575. #ifdef CONFIG_SMP
  576. ENTRY(reschedule_interrupt)
  577. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  578. .macro INVALIDATE_ENTRY num
  579. ENTRY(invalidate_interrupt\num)
  580. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  581. .endm
  582. INVALIDATE_ENTRY 0
  583. INVALIDATE_ENTRY 1
  584. INVALIDATE_ENTRY 2
  585. INVALIDATE_ENTRY 3
  586. INVALIDATE_ENTRY 4
  587. INVALIDATE_ENTRY 5
  588. INVALIDATE_ENTRY 6
  589. INVALIDATE_ENTRY 7
  590. ENTRY(call_function_interrupt)
  591. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  592. #endif
  593. #ifdef CONFIG_X86_LOCAL_APIC
  594. ENTRY(apic_timer_interrupt)
  595. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  596. ENTRY(error_interrupt)
  597. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  598. ENTRY(spurious_interrupt)
  599. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  600. #endif
  601. /*
  602. * Exception entry points.
  603. */
  604. .macro zeroentry sym
  605. INTR_FRAME
  606. pushq $0 /* push error code/oldrax */
  607. CFI_ADJUST_CFA_OFFSET 8
  608. pushq %rax /* push real oldrax to the rdi slot */
  609. CFI_ADJUST_CFA_OFFSET 8
  610. leaq \sym(%rip),%rax
  611. jmp error_entry
  612. CFI_ENDPROC
  613. .endm
  614. .macro errorentry sym
  615. XCPT_FRAME
  616. pushq %rax
  617. CFI_ADJUST_CFA_OFFSET 8
  618. leaq \sym(%rip),%rax
  619. jmp error_entry
  620. CFI_ENDPROC
  621. .endm
  622. /* error code is on the stack already */
  623. /* handle NMI like exceptions that can happen everywhere */
  624. .macro paranoidentry sym, ist=0
  625. SAVE_ALL
  626. cld
  627. movl $1,%ebx
  628. movl $MSR_GS_BASE,%ecx
  629. rdmsr
  630. testl %edx,%edx
  631. js 1f
  632. swapgs
  633. xorl %ebx,%ebx
  634. 1:
  635. .if \ist
  636. movq %gs:pda_data_offset, %rbp
  637. .endif
  638. movq %rsp,%rdi
  639. movq ORIG_RAX(%rsp),%rsi
  640. movq $-1,ORIG_RAX(%rsp)
  641. .if \ist
  642. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  643. .endif
  644. call \sym
  645. .if \ist
  646. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  647. .endif
  648. cli
  649. .endm
  650. /*
  651. * Exception entry point. This expects an error code/orig_rax on the stack
  652. * and the exception handler in %rax.
  653. */
  654. ENTRY(error_entry)
  655. _frame RDI
  656. /* rdi slot contains rax, oldrax contains error code */
  657. cld
  658. subq $14*8,%rsp
  659. CFI_ADJUST_CFA_OFFSET (14*8)
  660. movq %rsi,13*8(%rsp)
  661. CFI_REL_OFFSET rsi,RSI
  662. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  663. movq %rdx,12*8(%rsp)
  664. CFI_REL_OFFSET rdx,RDX
  665. movq %rcx,11*8(%rsp)
  666. CFI_REL_OFFSET rcx,RCX
  667. movq %rsi,10*8(%rsp) /* store rax */
  668. CFI_REL_OFFSET rax,RAX
  669. movq %r8, 9*8(%rsp)
  670. CFI_REL_OFFSET r8,R8
  671. movq %r9, 8*8(%rsp)
  672. CFI_REL_OFFSET r9,R9
  673. movq %r10,7*8(%rsp)
  674. CFI_REL_OFFSET r10,R10
  675. movq %r11,6*8(%rsp)
  676. CFI_REL_OFFSET r11,R11
  677. movq %rbx,5*8(%rsp)
  678. CFI_REL_OFFSET rbx,RBX
  679. movq %rbp,4*8(%rsp)
  680. CFI_REL_OFFSET rbp,RBP
  681. movq %r12,3*8(%rsp)
  682. CFI_REL_OFFSET r12,R12
  683. movq %r13,2*8(%rsp)
  684. CFI_REL_OFFSET r13,R13
  685. movq %r14,1*8(%rsp)
  686. CFI_REL_OFFSET r14,R14
  687. movq %r15,(%rsp)
  688. CFI_REL_OFFSET r15,R15
  689. xorl %ebx,%ebx
  690. testl $3,CS(%rsp)
  691. je error_kernelspace
  692. error_swapgs:
  693. swapgs
  694. error_sti:
  695. movq %rdi,RDI(%rsp)
  696. movq %rsp,%rdi
  697. movq ORIG_RAX(%rsp),%rsi /* get error code */
  698. movq $-1,ORIG_RAX(%rsp)
  699. call *%rax
  700. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  701. error_exit:
  702. movl %ebx,%eax
  703. RESTORE_REST
  704. cli
  705. GET_THREAD_INFO(%rcx)
  706. testl %eax,%eax
  707. jne retint_kernel
  708. movl threadinfo_flags(%rcx),%edx
  709. movl $_TIF_WORK_MASK,%edi
  710. andl %edi,%edx
  711. jnz retint_careful
  712. swapgs
  713. RESTORE_ARGS 0,8,0
  714. jmp iret_label
  715. CFI_ENDPROC
  716. error_kernelspace:
  717. incl %ebx
  718. /* There are two places in the kernel that can potentially fault with
  719. usergs. Handle them here. The exception handlers after
  720. iret run with kernel gs again, so don't set the user space flag.
  721. B stepping K8s sometimes report an truncated RIP for IRET
  722. exceptions returning to compat mode. Check for these here too. */
  723. leaq iret_label(%rip),%rbp
  724. cmpq %rbp,RIP(%rsp)
  725. je error_swapgs
  726. movl %ebp,%ebp /* zero extend */
  727. cmpq %rbp,RIP(%rsp)
  728. je error_swapgs
  729. cmpq $gs_change,RIP(%rsp)
  730. je error_swapgs
  731. jmp error_sti
  732. /* Reload gs selector with exception handling */
  733. /* edi: new selector */
  734. ENTRY(load_gs_index)
  735. CFI_STARTPROC
  736. pushf
  737. CFI_ADJUST_CFA_OFFSET 8
  738. cli
  739. swapgs
  740. gs_change:
  741. movl %edi,%gs
  742. 2: mfence /* workaround */
  743. swapgs
  744. popf
  745. CFI_ADJUST_CFA_OFFSET -8
  746. ret
  747. CFI_ENDPROC
  748. .section __ex_table,"a"
  749. .align 8
  750. .quad gs_change,bad_gs
  751. .previous
  752. .section .fixup,"ax"
  753. /* running with kernelgs */
  754. bad_gs:
  755. swapgs /* switch back to user gs */
  756. xorl %eax,%eax
  757. movl %eax,%gs
  758. jmp 2b
  759. .previous
  760. /*
  761. * Create a kernel thread.
  762. *
  763. * C extern interface:
  764. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  765. *
  766. * asm input arguments:
  767. * rdi: fn, rsi: arg, rdx: flags
  768. */
  769. ENTRY(kernel_thread)
  770. CFI_STARTPROC
  771. FAKE_STACK_FRAME $child_rip
  772. SAVE_ALL
  773. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  774. movq %rdx,%rdi
  775. orq kernel_thread_flags(%rip),%rdi
  776. movq $-1, %rsi
  777. movq %rsp, %rdx
  778. xorl %r8d,%r8d
  779. xorl %r9d,%r9d
  780. # clone now
  781. call do_fork
  782. movq %rax,RAX(%rsp)
  783. xorl %edi,%edi
  784. /*
  785. * It isn't worth to check for reschedule here,
  786. * so internally to the x86_64 port you can rely on kernel_thread()
  787. * not to reschedule the child before returning, this avoids the need
  788. * of hacks for example to fork off the per-CPU idle tasks.
  789. * [Hopefully no generic code relies on the reschedule -AK]
  790. */
  791. RESTORE_ALL
  792. UNFAKE_STACK_FRAME
  793. ret
  794. CFI_ENDPROC
  795. child_rip:
  796. /*
  797. * Here we are in the child and the registers are set as they were
  798. * at kernel_thread() invocation in the parent.
  799. */
  800. movq %rdi, %rax
  801. movq %rsi, %rdi
  802. call *%rax
  803. # exit
  804. xorl %edi, %edi
  805. call do_exit
  806. /*
  807. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  808. *
  809. * C extern interface:
  810. * extern long execve(char *name, char **argv, char **envp)
  811. *
  812. * asm input arguments:
  813. * rdi: name, rsi: argv, rdx: envp
  814. *
  815. * We want to fallback into:
  816. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  817. *
  818. * do_sys_execve asm fallback arguments:
  819. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  820. */
  821. ENTRY(execve)
  822. CFI_STARTPROC
  823. FAKE_STACK_FRAME $0
  824. SAVE_ALL
  825. call sys_execve
  826. movq %rax, RAX(%rsp)
  827. RESTORE_REST
  828. testq %rax,%rax
  829. je int_ret_from_sys_call
  830. RESTORE_ARGS
  831. UNFAKE_STACK_FRAME
  832. ret
  833. CFI_ENDPROC
  834. KPROBE_ENTRY(page_fault)
  835. errorentry do_page_fault
  836. .previous .text
  837. ENTRY(coprocessor_error)
  838. zeroentry do_coprocessor_error
  839. ENTRY(simd_coprocessor_error)
  840. zeroentry do_simd_coprocessor_error
  841. ENTRY(device_not_available)
  842. zeroentry math_state_restore
  843. /* runs on exception stack */
  844. KPROBE_ENTRY(debug)
  845. INTR_FRAME
  846. pushq $0
  847. CFI_ADJUST_CFA_OFFSET 8
  848. paranoidentry do_debug, DEBUG_STACK
  849. jmp paranoid_exit
  850. CFI_ENDPROC
  851. .previous .text
  852. /* runs on exception stack */
  853. KPROBE_ENTRY(nmi)
  854. INTR_FRAME
  855. pushq $-1
  856. CFI_ADJUST_CFA_OFFSET 8
  857. paranoidentry do_nmi
  858. /*
  859. * "Paranoid" exit path from exception stack.
  860. * Paranoid because this is used by NMIs and cannot take
  861. * any kernel state for granted.
  862. * We don't do kernel preemption checks here, because only
  863. * NMI should be common and it does not enable IRQs and
  864. * cannot get reschedule ticks.
  865. */
  866. /* ebx: no swapgs flag */
  867. paranoid_exit:
  868. testl %ebx,%ebx /* swapgs needed? */
  869. jnz paranoid_restore
  870. testl $3,CS(%rsp)
  871. jnz paranoid_userspace
  872. paranoid_swapgs:
  873. swapgs
  874. paranoid_restore:
  875. RESTORE_ALL 8
  876. iretq
  877. paranoid_userspace:
  878. GET_THREAD_INFO(%rcx)
  879. movl threadinfo_flags(%rcx),%ebx
  880. andl $_TIF_WORK_MASK,%ebx
  881. jz paranoid_swapgs
  882. movq %rsp,%rdi /* &pt_regs */
  883. call sync_regs
  884. movq %rax,%rsp /* switch stack for scheduling */
  885. testl $_TIF_NEED_RESCHED,%ebx
  886. jnz paranoid_schedule
  887. movl %ebx,%edx /* arg3: thread flags */
  888. sti
  889. xorl %esi,%esi /* arg2: oldset */
  890. movq %rsp,%rdi /* arg1: &pt_regs */
  891. call do_notify_resume
  892. cli
  893. jmp paranoid_userspace
  894. paranoid_schedule:
  895. sti
  896. call schedule
  897. cli
  898. jmp paranoid_userspace
  899. CFI_ENDPROC
  900. .previous .text
  901. KPROBE_ENTRY(int3)
  902. INTR_FRAME
  903. pushq $0
  904. CFI_ADJUST_CFA_OFFSET 8
  905. paranoidentry do_int3, DEBUG_STACK
  906. jmp paranoid_exit
  907. CFI_ENDPROC
  908. .previous .text
  909. ENTRY(overflow)
  910. zeroentry do_overflow
  911. ENTRY(bounds)
  912. zeroentry do_bounds
  913. ENTRY(invalid_op)
  914. zeroentry do_invalid_op
  915. ENTRY(coprocessor_segment_overrun)
  916. zeroentry do_coprocessor_segment_overrun
  917. ENTRY(reserved)
  918. zeroentry do_reserved
  919. /* runs on exception stack */
  920. ENTRY(double_fault)
  921. XCPT_FRAME
  922. paranoidentry do_double_fault
  923. jmp paranoid_exit
  924. CFI_ENDPROC
  925. ENTRY(invalid_TSS)
  926. errorentry do_invalid_TSS
  927. ENTRY(segment_not_present)
  928. errorentry do_segment_not_present
  929. /* runs on exception stack */
  930. ENTRY(stack_segment)
  931. XCPT_FRAME
  932. paranoidentry do_stack_segment
  933. jmp paranoid_exit
  934. CFI_ENDPROC
  935. KPROBE_ENTRY(general_protection)
  936. errorentry do_general_protection
  937. .previous .text
  938. ENTRY(alignment_check)
  939. errorentry do_alignment_check
  940. ENTRY(divide_error)
  941. zeroentry do_divide_error
  942. ENTRY(spurious_interrupt_bug)
  943. zeroentry do_spurious_interrupt_bug
  944. #ifdef CONFIG_X86_MCE
  945. /* runs on exception stack */
  946. ENTRY(machine_check)
  947. INTR_FRAME
  948. pushq $0
  949. CFI_ADJUST_CFA_OFFSET 8
  950. paranoidentry do_machine_check
  951. jmp paranoid_exit
  952. CFI_ENDPROC
  953. #endif
  954. ENTRY(call_softirq)
  955. CFI_STARTPROC
  956. movq %gs:pda_irqstackptr,%rax
  957. movq %rsp,%rdx
  958. CFI_DEF_CFA_REGISTER rdx
  959. incl %gs:pda_irqcount
  960. cmove %rax,%rsp
  961. pushq %rdx
  962. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  963. call __do_softirq
  964. popq %rsp
  965. CFI_DEF_CFA_REGISTER rsp
  966. decl %gs:pda_irqcount
  967. ret
  968. CFI_ENDPROC