entry.S 23 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. #include <asm/page.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. /*
  48. * C code is not supposed to know about undefined top of stack. Every time
  49. * a C function with an pt_regs argument is called from the SYSCALL based
  50. * fast path FIXUP_TOP_OF_STACK is needed.
  51. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  52. * manipulation.
  53. */
  54. /* %rsp:at FRAMEEND */
  55. .macro FIXUP_TOP_OF_STACK tmp
  56. movq %gs:pda_oldrsp,\tmp
  57. movq \tmp,RSP(%rsp)
  58. movq $__USER_DS,SS(%rsp)
  59. movq $__USER_CS,CS(%rsp)
  60. movq $-1,RCX(%rsp)
  61. movq R11(%rsp),\tmp /* get eflags */
  62. movq \tmp,EFLAGS(%rsp)
  63. .endm
  64. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  65. movq RSP-\offset(%rsp),\tmp
  66. movq \tmp,%gs:pda_oldrsp
  67. movq EFLAGS-\offset(%rsp),\tmp
  68. movq \tmp,R11-\offset(%rsp)
  69. .endm
  70. .macro FAKE_STACK_FRAME child_rip
  71. /* push in order ss, rsp, eflags, cs, rip */
  72. xorl %eax, %eax
  73. pushq %rax /* ss */
  74. CFI_ADJUST_CFA_OFFSET 8
  75. /*CFI_REL_OFFSET ss,0*/
  76. pushq %rax /* rsp */
  77. CFI_ADJUST_CFA_OFFSET 8
  78. CFI_REL_OFFSET rsp,0
  79. pushq $(1<<9) /* eflags - interrupts on */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET rflags,0*/
  82. pushq $__KERNEL_CS /* cs */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. /*CFI_REL_OFFSET cs,0*/
  85. pushq \child_rip /* rip */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. CFI_REL_OFFSET rip,0
  88. pushq %rax /* orig rax */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. .endm
  91. .macro UNFAKE_STACK_FRAME
  92. addq $8*6, %rsp
  93. CFI_ADJUST_CFA_OFFSET -(6*8)
  94. .endm
  95. .macro CFI_DEFAULT_STACK start=1
  96. .if \start
  97. CFI_STARTPROC simple
  98. CFI_DEF_CFA rsp,SS+8
  99. .else
  100. CFI_DEF_CFA_OFFSET SS+8
  101. .endif
  102. CFI_REL_OFFSET r15,R15
  103. CFI_REL_OFFSET r14,R14
  104. CFI_REL_OFFSET r13,R13
  105. CFI_REL_OFFSET r12,R12
  106. CFI_REL_OFFSET rbp,RBP
  107. CFI_REL_OFFSET rbx,RBX
  108. CFI_REL_OFFSET r11,R11
  109. CFI_REL_OFFSET r10,R10
  110. CFI_REL_OFFSET r9,R9
  111. CFI_REL_OFFSET r8,R8
  112. CFI_REL_OFFSET rax,RAX
  113. CFI_REL_OFFSET rcx,RCX
  114. CFI_REL_OFFSET rdx,RDX
  115. CFI_REL_OFFSET rsi,RSI
  116. CFI_REL_OFFSET rdi,RDI
  117. CFI_REL_OFFSET rip,RIP
  118. /*CFI_REL_OFFSET cs,CS*/
  119. /*CFI_REL_OFFSET rflags,EFLAGS*/
  120. CFI_REL_OFFSET rsp,RSP
  121. /*CFI_REL_OFFSET ss,SS*/
  122. .endm
  123. /*
  124. * A newly forked process directly context switches into this.
  125. */
  126. /* rdi: prev */
  127. ENTRY(ret_from_fork)
  128. CFI_DEFAULT_STACK
  129. call schedule_tail
  130. GET_THREAD_INFO(%rcx)
  131. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  132. jnz rff_trace
  133. rff_action:
  134. RESTORE_REST
  135. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  136. je int_ret_from_sys_call
  137. testl $_TIF_IA32,threadinfo_flags(%rcx)
  138. jnz int_ret_from_sys_call
  139. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  140. jmp ret_from_sys_call
  141. rff_trace:
  142. movq %rsp,%rdi
  143. call syscall_trace_leave
  144. GET_THREAD_INFO(%rcx)
  145. jmp rff_action
  146. CFI_ENDPROC
  147. /*
  148. * System call entry. Upto 6 arguments in registers are supported.
  149. *
  150. * SYSCALL does not save anything on the stack and does not change the
  151. * stack pointer.
  152. */
  153. /*
  154. * Register setup:
  155. * rax system call number
  156. * rdi arg0
  157. * rcx return address for syscall/sysret, C arg3
  158. * rsi arg1
  159. * rdx arg2
  160. * r10 arg3 (--> moved to rcx for C)
  161. * r8 arg4
  162. * r9 arg5
  163. * r11 eflags for syscall/sysret, temporary for C
  164. * r12-r15,rbp,rbx saved by C code, not touched.
  165. *
  166. * Interrupts are off on entry.
  167. * Only called from user space.
  168. *
  169. * XXX if we had a free scratch register we could save the RSP into the stack frame
  170. * and report it properly in ps. Unfortunately we haven't.
  171. */
  172. ENTRY(system_call)
  173. CFI_STARTPROC simple
  174. CFI_DEF_CFA rsp,0
  175. CFI_REGISTER rip,rcx
  176. /*CFI_REGISTER rflags,r11*/
  177. swapgs
  178. movq %rsp,%gs:pda_oldrsp
  179. movq %gs:pda_kernelstack,%rsp
  180. sti
  181. SAVE_ARGS 8,1
  182. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  183. movq %rcx,RIP-ARGOFFSET(%rsp)
  184. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  185. GET_THREAD_INFO(%rcx)
  186. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  187. CFI_REMEMBER_STATE
  188. jnz tracesys
  189. cmpq $__NR_syscall_max,%rax
  190. ja badsys
  191. movq %r10,%rcx
  192. call *sys_call_table(,%rax,8) # XXX: rip relative
  193. movq %rax,RAX-ARGOFFSET(%rsp)
  194. /*
  195. * Syscall return path ending with SYSRET (fast path)
  196. * Has incomplete stack frame and undefined top of stack.
  197. */
  198. .globl ret_from_sys_call
  199. ret_from_sys_call:
  200. movl $_TIF_ALLWORK_MASK,%edi
  201. /* edi: flagmask */
  202. sysret_check:
  203. GET_THREAD_INFO(%rcx)
  204. cli
  205. movl threadinfo_flags(%rcx),%edx
  206. andl %edi,%edx
  207. CFI_REMEMBER_STATE
  208. jnz sysret_careful
  209. movq RIP-ARGOFFSET(%rsp),%rcx
  210. CFI_REGISTER rip,rcx
  211. RESTORE_ARGS 0,-ARG_SKIP,1
  212. /*CFI_REGISTER rflags,r11*/
  213. movq %gs:pda_oldrsp,%rsp
  214. swapgs
  215. sysretq
  216. /* Handle reschedules */
  217. /* edx: work, edi: workmask */
  218. sysret_careful:
  219. CFI_RESTORE_STATE
  220. bt $TIF_NEED_RESCHED,%edx
  221. jnc sysret_signal
  222. sti
  223. pushq %rdi
  224. CFI_ADJUST_CFA_OFFSET 8
  225. call schedule
  226. popq %rdi
  227. CFI_ADJUST_CFA_OFFSET -8
  228. jmp sysret_check
  229. /* Handle a signal */
  230. sysret_signal:
  231. sti
  232. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  233. jz 1f
  234. /* Really a signal */
  235. /* edx: work flags (arg3) */
  236. leaq do_notify_resume(%rip),%rax
  237. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  238. xorl %esi,%esi # oldset -> arg2
  239. call ptregscall_common
  240. 1: movl $_TIF_NEED_RESCHED,%edi
  241. jmp sysret_check
  242. badsys:
  243. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  244. jmp ret_from_sys_call
  245. /* Do syscall tracing */
  246. tracesys:
  247. CFI_RESTORE_STATE
  248. SAVE_REST
  249. movq $-ENOSYS,RAX(%rsp)
  250. FIXUP_TOP_OF_STACK %rdi
  251. movq %rsp,%rdi
  252. call syscall_trace_enter
  253. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  254. RESTORE_REST
  255. cmpq $__NR_syscall_max,%rax
  256. ja 1f
  257. movq %r10,%rcx /* fixup for C */
  258. call *sys_call_table(,%rax,8)
  259. movq %rax,RAX-ARGOFFSET(%rsp)
  260. 1: SAVE_REST
  261. movq %rsp,%rdi
  262. call syscall_trace_leave
  263. RESTORE_TOP_OF_STACK %rbx
  264. RESTORE_REST
  265. jmp ret_from_sys_call
  266. CFI_ENDPROC
  267. /*
  268. * Syscall return path ending with IRET.
  269. * Has correct top of stack, but partial stack frame.
  270. */
  271. ENTRY(int_ret_from_sys_call)
  272. CFI_STARTPROC simple
  273. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  274. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  275. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  276. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  277. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  278. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  279. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  280. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  281. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  282. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  283. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  284. CFI_REL_OFFSET r8,R8-ARGOFFSET
  285. CFI_REL_OFFSET r9,R9-ARGOFFSET
  286. CFI_REL_OFFSET r10,R10-ARGOFFSET
  287. CFI_REL_OFFSET r11,R11-ARGOFFSET
  288. cli
  289. testl $3,CS-ARGOFFSET(%rsp)
  290. je retint_restore_args
  291. movl $_TIF_ALLWORK_MASK,%edi
  292. /* edi: mask to check */
  293. int_with_check:
  294. GET_THREAD_INFO(%rcx)
  295. movl threadinfo_flags(%rcx),%edx
  296. andl %edi,%edx
  297. jnz int_careful
  298. andl $~TS_COMPAT,threadinfo_status(%rcx)
  299. jmp retint_swapgs
  300. /* Either reschedule or signal or syscall exit tracking needed. */
  301. /* First do a reschedule test. */
  302. /* edx: work, edi: workmask */
  303. int_careful:
  304. bt $TIF_NEED_RESCHED,%edx
  305. jnc int_very_careful
  306. sti
  307. pushq %rdi
  308. CFI_ADJUST_CFA_OFFSET 8
  309. call schedule
  310. popq %rdi
  311. CFI_ADJUST_CFA_OFFSET -8
  312. cli
  313. jmp int_with_check
  314. /* handle signals and tracing -- both require a full stack frame */
  315. int_very_careful:
  316. sti
  317. SAVE_REST
  318. /* Check for syscall exit trace */
  319. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  320. jz int_signal
  321. pushq %rdi
  322. CFI_ADJUST_CFA_OFFSET 8
  323. leaq 8(%rsp),%rdi # &ptregs -> arg1
  324. call syscall_trace_leave
  325. popq %rdi
  326. CFI_ADJUST_CFA_OFFSET -8
  327. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  328. cli
  329. jmp int_restore_rest
  330. int_signal:
  331. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  332. jz 1f
  333. movq %rsp,%rdi # &ptregs -> arg1
  334. xorl %esi,%esi # oldset -> arg2
  335. call do_notify_resume
  336. 1: movl $_TIF_NEED_RESCHED,%edi
  337. int_restore_rest:
  338. RESTORE_REST
  339. cli
  340. jmp int_with_check
  341. CFI_ENDPROC
  342. /*
  343. * Certain special system calls that need to save a complete full stack frame.
  344. */
  345. .macro PTREGSCALL label,func,arg
  346. .globl \label
  347. \label:
  348. leaq \func(%rip),%rax
  349. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  350. jmp ptregscall_common
  351. .endm
  352. CFI_STARTPROC
  353. PTREGSCALL stub_clone, sys_clone, %r8
  354. PTREGSCALL stub_fork, sys_fork, %rdi
  355. PTREGSCALL stub_vfork, sys_vfork, %rdi
  356. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  357. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  358. PTREGSCALL stub_iopl, sys_iopl, %rsi
  359. ENTRY(ptregscall_common)
  360. popq %r11
  361. CFI_ADJUST_CFA_OFFSET -8
  362. CFI_REGISTER rip, r11
  363. SAVE_REST
  364. movq %r11, %r15
  365. CFI_REGISTER rip, r15
  366. FIXUP_TOP_OF_STACK %r11
  367. call *%rax
  368. RESTORE_TOP_OF_STACK %r11
  369. movq %r15, %r11
  370. CFI_REGISTER rip, r11
  371. RESTORE_REST
  372. pushq %r11
  373. CFI_ADJUST_CFA_OFFSET 8
  374. CFI_REL_OFFSET rip, 0
  375. ret
  376. CFI_ENDPROC
  377. ENTRY(stub_execve)
  378. CFI_STARTPROC
  379. popq %r11
  380. CFI_ADJUST_CFA_OFFSET -8
  381. CFI_REGISTER rip, r11
  382. SAVE_REST
  383. FIXUP_TOP_OF_STACK %r11
  384. call sys_execve
  385. RESTORE_TOP_OF_STACK %r11
  386. movq %rax,RAX(%rsp)
  387. RESTORE_REST
  388. jmp int_ret_from_sys_call
  389. CFI_ENDPROC
  390. /*
  391. * sigreturn is special because it needs to restore all registers on return.
  392. * This cannot be done with SYSRET, so use the IRET return path instead.
  393. */
  394. ENTRY(stub_rt_sigreturn)
  395. CFI_STARTPROC
  396. addq $8, %rsp
  397. CFI_ADJUST_CFA_OFFSET -8
  398. SAVE_REST
  399. movq %rsp,%rdi
  400. FIXUP_TOP_OF_STACK %r11
  401. call sys_rt_sigreturn
  402. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  403. RESTORE_REST
  404. jmp int_ret_from_sys_call
  405. CFI_ENDPROC
  406. /*
  407. * initial frame state for interrupts and exceptions
  408. */
  409. .macro _frame ref
  410. CFI_STARTPROC simple
  411. CFI_DEF_CFA rsp,SS+8-\ref
  412. /*CFI_REL_OFFSET ss,SS-\ref*/
  413. CFI_REL_OFFSET rsp,RSP-\ref
  414. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  415. /*CFI_REL_OFFSET cs,CS-\ref*/
  416. CFI_REL_OFFSET rip,RIP-\ref
  417. .endm
  418. /* initial frame state for interrupts (and exceptions without error code) */
  419. #define INTR_FRAME _frame RIP
  420. /* initial frame state for exceptions with error code (and interrupts with
  421. vector already pushed) */
  422. #define XCPT_FRAME _frame ORIG_RAX
  423. /*
  424. * Interrupt entry/exit.
  425. *
  426. * Interrupt entry points save only callee clobbered registers in fast path.
  427. *
  428. * Entry runs with interrupts off.
  429. */
  430. /* 0(%rsp): interrupt number */
  431. .macro interrupt func
  432. cld
  433. #ifdef CONFIG_DEBUG_INFO
  434. SAVE_ALL
  435. movq %rsp,%rdi
  436. /*
  437. * Setup a stack frame pointer. This allows gdb to trace
  438. * back to the original stack.
  439. */
  440. movq %rsp,%rbp
  441. CFI_DEF_CFA_REGISTER rbp
  442. #else
  443. SAVE_ARGS
  444. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  445. #endif
  446. testl $3,CS(%rdi)
  447. je 1f
  448. swapgs
  449. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  450. movq %gs:pda_irqstackptr,%rax
  451. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  452. pushq %rdi # save old stack
  453. #ifndef CONFIG_DEBUG_INFO
  454. CFI_ADJUST_CFA_OFFSET 8
  455. #endif
  456. call \func
  457. .endm
  458. ENTRY(common_interrupt)
  459. XCPT_FRAME
  460. interrupt do_IRQ
  461. /* 0(%rsp): oldrsp-ARGOFFSET */
  462. ret_from_intr:
  463. popq %rdi
  464. #ifndef CONFIG_DEBUG_INFO
  465. CFI_ADJUST_CFA_OFFSET -8
  466. #endif
  467. cli
  468. decl %gs:pda_irqcount
  469. #ifdef CONFIG_DEBUG_INFO
  470. movq RBP(%rdi),%rbp
  471. CFI_DEF_CFA_REGISTER rsp
  472. #endif
  473. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  474. exit_intr:
  475. GET_THREAD_INFO(%rcx)
  476. testl $3,CS-ARGOFFSET(%rsp)
  477. je retint_kernel
  478. /* Interrupt came from user space */
  479. /*
  480. * Has a correct top of stack, but a partial stack frame
  481. * %rcx: thread info. Interrupts off.
  482. */
  483. retint_with_reschedule:
  484. movl $_TIF_WORK_MASK,%edi
  485. retint_check:
  486. movl threadinfo_flags(%rcx),%edx
  487. andl %edi,%edx
  488. CFI_REMEMBER_STATE
  489. jnz retint_careful
  490. retint_swapgs:
  491. swapgs
  492. retint_restore_args:
  493. cli
  494. RESTORE_ARGS 0,8,0
  495. iret_label:
  496. iretq
  497. .section __ex_table,"a"
  498. .quad iret_label,bad_iret
  499. .previous
  500. .section .fixup,"ax"
  501. /* force a signal here? this matches i386 behaviour */
  502. /* running with kernel gs */
  503. bad_iret:
  504. movq $11,%rdi /* SIGSEGV */
  505. sti
  506. jmp do_exit
  507. .previous
  508. /* edi: workmask, edx: work */
  509. retint_careful:
  510. CFI_RESTORE_STATE
  511. bt $TIF_NEED_RESCHED,%edx
  512. jnc retint_signal
  513. sti
  514. pushq %rdi
  515. CFI_ADJUST_CFA_OFFSET 8
  516. call schedule
  517. popq %rdi
  518. CFI_ADJUST_CFA_OFFSET -8
  519. GET_THREAD_INFO(%rcx)
  520. cli
  521. jmp retint_check
  522. retint_signal:
  523. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  524. jz retint_swapgs
  525. sti
  526. SAVE_REST
  527. movq $-1,ORIG_RAX(%rsp)
  528. xorl %esi,%esi # oldset
  529. movq %rsp,%rdi # &pt_regs
  530. call do_notify_resume
  531. RESTORE_REST
  532. cli
  533. movl $_TIF_NEED_RESCHED,%edi
  534. GET_THREAD_INFO(%rcx)
  535. jmp retint_check
  536. #ifdef CONFIG_PREEMPT
  537. /* Returning to kernel space. Check if we need preemption */
  538. /* rcx: threadinfo. interrupts off. */
  539. .p2align
  540. retint_kernel:
  541. cmpl $0,threadinfo_preempt_count(%rcx)
  542. jnz retint_restore_args
  543. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  544. jnc retint_restore_args
  545. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  546. jnc retint_restore_args
  547. call preempt_schedule_irq
  548. jmp exit_intr
  549. #endif
  550. CFI_ENDPROC
  551. /*
  552. * APIC interrupts.
  553. */
  554. .macro apicinterrupt num,func
  555. INTR_FRAME
  556. pushq $\num-256
  557. CFI_ADJUST_CFA_OFFSET 8
  558. interrupt \func
  559. jmp ret_from_intr
  560. CFI_ENDPROC
  561. .endm
  562. ENTRY(thermal_interrupt)
  563. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  564. ENTRY(threshold_interrupt)
  565. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  566. #ifdef CONFIG_SMP
  567. ENTRY(reschedule_interrupt)
  568. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  569. .macro INVALIDATE_ENTRY num
  570. ENTRY(invalidate_interrupt\num)
  571. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  572. .endm
  573. INVALIDATE_ENTRY 0
  574. INVALIDATE_ENTRY 1
  575. INVALIDATE_ENTRY 2
  576. INVALIDATE_ENTRY 3
  577. INVALIDATE_ENTRY 4
  578. INVALIDATE_ENTRY 5
  579. INVALIDATE_ENTRY 6
  580. INVALIDATE_ENTRY 7
  581. ENTRY(call_function_interrupt)
  582. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  583. #endif
  584. #ifdef CONFIG_X86_LOCAL_APIC
  585. ENTRY(apic_timer_interrupt)
  586. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  587. ENTRY(error_interrupt)
  588. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  589. ENTRY(spurious_interrupt)
  590. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  591. #endif
  592. /*
  593. * Exception entry points.
  594. */
  595. .macro zeroentry sym
  596. INTR_FRAME
  597. pushq $0 /* push error code/oldrax */
  598. CFI_ADJUST_CFA_OFFSET 8
  599. pushq %rax /* push real oldrax to the rdi slot */
  600. CFI_ADJUST_CFA_OFFSET 8
  601. leaq \sym(%rip),%rax
  602. jmp error_entry
  603. CFI_ENDPROC
  604. .endm
  605. .macro errorentry sym
  606. XCPT_FRAME
  607. pushq %rax
  608. CFI_ADJUST_CFA_OFFSET 8
  609. leaq \sym(%rip),%rax
  610. jmp error_entry
  611. CFI_ENDPROC
  612. .endm
  613. /* error code is on the stack already */
  614. /* handle NMI like exceptions that can happen everywhere */
  615. .macro paranoidentry sym, ist=0
  616. SAVE_ALL
  617. cld
  618. movl $1,%ebx
  619. movl $MSR_GS_BASE,%ecx
  620. rdmsr
  621. testl %edx,%edx
  622. js 1f
  623. swapgs
  624. xorl %ebx,%ebx
  625. 1:
  626. .if \ist
  627. movq %gs:pda_data_offset, %rbp
  628. .endif
  629. movq %rsp,%rdi
  630. movq ORIG_RAX(%rsp),%rsi
  631. movq $-1,ORIG_RAX(%rsp)
  632. .if \ist
  633. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  634. .endif
  635. call \sym
  636. .if \ist
  637. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  638. .endif
  639. cli
  640. .endm
  641. /*
  642. * Exception entry point. This expects an error code/orig_rax on the stack
  643. * and the exception handler in %rax.
  644. */
  645. ENTRY(error_entry)
  646. _frame RDI
  647. /* rdi slot contains rax, oldrax contains error code */
  648. cld
  649. subq $14*8,%rsp
  650. CFI_ADJUST_CFA_OFFSET (14*8)
  651. movq %rsi,13*8(%rsp)
  652. CFI_REL_OFFSET rsi,RSI
  653. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  654. movq %rdx,12*8(%rsp)
  655. CFI_REL_OFFSET rdx,RDX
  656. movq %rcx,11*8(%rsp)
  657. CFI_REL_OFFSET rcx,RCX
  658. movq %rsi,10*8(%rsp) /* store rax */
  659. CFI_REL_OFFSET rax,RAX
  660. movq %r8, 9*8(%rsp)
  661. CFI_REL_OFFSET r8,R8
  662. movq %r9, 8*8(%rsp)
  663. CFI_REL_OFFSET r9,R9
  664. movq %r10,7*8(%rsp)
  665. CFI_REL_OFFSET r10,R10
  666. movq %r11,6*8(%rsp)
  667. CFI_REL_OFFSET r11,R11
  668. movq %rbx,5*8(%rsp)
  669. CFI_REL_OFFSET rbx,RBX
  670. movq %rbp,4*8(%rsp)
  671. CFI_REL_OFFSET rbp,RBP
  672. movq %r12,3*8(%rsp)
  673. CFI_REL_OFFSET r12,R12
  674. movq %r13,2*8(%rsp)
  675. CFI_REL_OFFSET r13,R13
  676. movq %r14,1*8(%rsp)
  677. CFI_REL_OFFSET r14,R14
  678. movq %r15,(%rsp)
  679. CFI_REL_OFFSET r15,R15
  680. xorl %ebx,%ebx
  681. testl $3,CS(%rsp)
  682. je error_kernelspace
  683. error_swapgs:
  684. swapgs
  685. error_sti:
  686. movq %rdi,RDI(%rsp)
  687. movq %rsp,%rdi
  688. movq ORIG_RAX(%rsp),%rsi /* get error code */
  689. movq $-1,ORIG_RAX(%rsp)
  690. call *%rax
  691. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  692. error_exit:
  693. movl %ebx,%eax
  694. RESTORE_REST
  695. cli
  696. GET_THREAD_INFO(%rcx)
  697. testl %eax,%eax
  698. jne retint_kernel
  699. movl threadinfo_flags(%rcx),%edx
  700. movl $_TIF_WORK_MASK,%edi
  701. andl %edi,%edx
  702. jnz retint_careful
  703. swapgs
  704. RESTORE_ARGS 0,8,0
  705. jmp iret_label
  706. CFI_ENDPROC
  707. error_kernelspace:
  708. incl %ebx
  709. /* There are two places in the kernel that can potentially fault with
  710. usergs. Handle them here. The exception handlers after
  711. iret run with kernel gs again, so don't set the user space flag.
  712. B stepping K8s sometimes report an truncated RIP for IRET
  713. exceptions returning to compat mode. Check for these here too. */
  714. leaq iret_label(%rip),%rbp
  715. cmpq %rbp,RIP(%rsp)
  716. je error_swapgs
  717. movl %ebp,%ebp /* zero extend */
  718. cmpq %rbp,RIP(%rsp)
  719. je error_swapgs
  720. cmpq $gs_change,RIP(%rsp)
  721. je error_swapgs
  722. jmp error_sti
  723. /* Reload gs selector with exception handling */
  724. /* edi: new selector */
  725. ENTRY(load_gs_index)
  726. CFI_STARTPROC
  727. pushf
  728. CFI_ADJUST_CFA_OFFSET 8
  729. cli
  730. swapgs
  731. gs_change:
  732. movl %edi,%gs
  733. 2: mfence /* workaround */
  734. swapgs
  735. popf
  736. CFI_ADJUST_CFA_OFFSET -8
  737. ret
  738. CFI_ENDPROC
  739. .section __ex_table,"a"
  740. .align 8
  741. .quad gs_change,bad_gs
  742. .previous
  743. .section .fixup,"ax"
  744. /* running with kernelgs */
  745. bad_gs:
  746. swapgs /* switch back to user gs */
  747. xorl %eax,%eax
  748. movl %eax,%gs
  749. jmp 2b
  750. .previous
  751. /*
  752. * Create a kernel thread.
  753. *
  754. * C extern interface:
  755. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  756. *
  757. * asm input arguments:
  758. * rdi: fn, rsi: arg, rdx: flags
  759. */
  760. ENTRY(kernel_thread)
  761. CFI_STARTPROC
  762. FAKE_STACK_FRAME $child_rip
  763. SAVE_ALL
  764. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  765. movq %rdx,%rdi
  766. orq kernel_thread_flags(%rip),%rdi
  767. movq $-1, %rsi
  768. movq %rsp, %rdx
  769. xorl %r8d,%r8d
  770. xorl %r9d,%r9d
  771. # clone now
  772. call do_fork
  773. movq %rax,RAX(%rsp)
  774. xorl %edi,%edi
  775. /*
  776. * It isn't worth to check for reschedule here,
  777. * so internally to the x86_64 port you can rely on kernel_thread()
  778. * not to reschedule the child before returning, this avoids the need
  779. * of hacks for example to fork off the per-CPU idle tasks.
  780. * [Hopefully no generic code relies on the reschedule -AK]
  781. */
  782. RESTORE_ALL
  783. UNFAKE_STACK_FRAME
  784. ret
  785. CFI_ENDPROC
  786. child_rip:
  787. /*
  788. * Here we are in the child and the registers are set as they were
  789. * at kernel_thread() invocation in the parent.
  790. */
  791. movq %rdi, %rax
  792. movq %rsi, %rdi
  793. call *%rax
  794. # exit
  795. xorl %edi, %edi
  796. call do_exit
  797. /*
  798. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  799. *
  800. * C extern interface:
  801. * extern long execve(char *name, char **argv, char **envp)
  802. *
  803. * asm input arguments:
  804. * rdi: name, rsi: argv, rdx: envp
  805. *
  806. * We want to fallback into:
  807. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  808. *
  809. * do_sys_execve asm fallback arguments:
  810. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  811. */
  812. ENTRY(execve)
  813. CFI_STARTPROC
  814. FAKE_STACK_FRAME $0
  815. SAVE_ALL
  816. call sys_execve
  817. movq %rax, RAX(%rsp)
  818. RESTORE_REST
  819. testq %rax,%rax
  820. je int_ret_from_sys_call
  821. RESTORE_ARGS
  822. UNFAKE_STACK_FRAME
  823. ret
  824. CFI_ENDPROC
  825. KPROBE_ENTRY(page_fault)
  826. errorentry do_page_fault
  827. .previous .text
  828. ENTRY(coprocessor_error)
  829. zeroentry do_coprocessor_error
  830. ENTRY(simd_coprocessor_error)
  831. zeroentry do_simd_coprocessor_error
  832. ENTRY(device_not_available)
  833. zeroentry math_state_restore
  834. /* runs on exception stack */
  835. KPROBE_ENTRY(debug)
  836. INTR_FRAME
  837. pushq $0
  838. CFI_ADJUST_CFA_OFFSET 8
  839. paranoidentry do_debug, DEBUG_STACK
  840. jmp paranoid_exit
  841. CFI_ENDPROC
  842. .previous .text
  843. /* runs on exception stack */
  844. KPROBE_ENTRY(nmi)
  845. INTR_FRAME
  846. pushq $-1
  847. CFI_ADJUST_CFA_OFFSET 8
  848. paranoidentry do_nmi
  849. /*
  850. * "Paranoid" exit path from exception stack.
  851. * Paranoid because this is used by NMIs and cannot take
  852. * any kernel state for granted.
  853. * We don't do kernel preemption checks here, because only
  854. * NMI should be common and it does not enable IRQs and
  855. * cannot get reschedule ticks.
  856. */
  857. /* ebx: no swapgs flag */
  858. paranoid_exit:
  859. testl %ebx,%ebx /* swapgs needed? */
  860. jnz paranoid_restore
  861. testl $3,CS(%rsp)
  862. jnz paranoid_userspace
  863. paranoid_swapgs:
  864. swapgs
  865. paranoid_restore:
  866. RESTORE_ALL 8
  867. iretq
  868. paranoid_userspace:
  869. GET_THREAD_INFO(%rcx)
  870. movl threadinfo_flags(%rcx),%ebx
  871. andl $_TIF_WORK_MASK,%ebx
  872. jz paranoid_swapgs
  873. movq %rsp,%rdi /* &pt_regs */
  874. call sync_regs
  875. movq %rax,%rsp /* switch stack for scheduling */
  876. testl $_TIF_NEED_RESCHED,%ebx
  877. jnz paranoid_schedule
  878. movl %ebx,%edx /* arg3: thread flags */
  879. sti
  880. xorl %esi,%esi /* arg2: oldset */
  881. movq %rsp,%rdi /* arg1: &pt_regs */
  882. call do_notify_resume
  883. cli
  884. jmp paranoid_userspace
  885. paranoid_schedule:
  886. sti
  887. call schedule
  888. cli
  889. jmp paranoid_userspace
  890. CFI_ENDPROC
  891. .previous .text
  892. KPROBE_ENTRY(int3)
  893. INTR_FRAME
  894. pushq $0
  895. CFI_ADJUST_CFA_OFFSET 8
  896. paranoidentry do_int3, DEBUG_STACK
  897. jmp paranoid_exit
  898. CFI_ENDPROC
  899. .previous .text
  900. ENTRY(overflow)
  901. zeroentry do_overflow
  902. ENTRY(bounds)
  903. zeroentry do_bounds
  904. ENTRY(invalid_op)
  905. zeroentry do_invalid_op
  906. ENTRY(coprocessor_segment_overrun)
  907. zeroentry do_coprocessor_segment_overrun
  908. ENTRY(reserved)
  909. zeroentry do_reserved
  910. /* runs on exception stack */
  911. ENTRY(double_fault)
  912. XCPT_FRAME
  913. paranoidentry do_double_fault
  914. jmp paranoid_exit
  915. CFI_ENDPROC
  916. ENTRY(invalid_TSS)
  917. errorentry do_invalid_TSS
  918. ENTRY(segment_not_present)
  919. errorentry do_segment_not_present
  920. /* runs on exception stack */
  921. ENTRY(stack_segment)
  922. XCPT_FRAME
  923. paranoidentry do_stack_segment
  924. jmp paranoid_exit
  925. CFI_ENDPROC
  926. KPROBE_ENTRY(general_protection)
  927. errorentry do_general_protection
  928. .previous .text
  929. ENTRY(alignment_check)
  930. errorentry do_alignment_check
  931. ENTRY(divide_error)
  932. zeroentry do_divide_error
  933. ENTRY(spurious_interrupt_bug)
  934. zeroentry do_spurious_interrupt_bug
  935. #ifdef CONFIG_X86_MCE
  936. /* runs on exception stack */
  937. ENTRY(machine_check)
  938. INTR_FRAME
  939. pushq $0
  940. CFI_ADJUST_CFA_OFFSET 8
  941. paranoidentry do_machine_check
  942. jmp paranoid_exit
  943. CFI_ENDPROC
  944. #endif
  945. ENTRY(call_softirq)
  946. CFI_STARTPROC
  947. movq %gs:pda_irqstackptr,%rax
  948. movq %rsp,%rdx
  949. CFI_DEF_CFA_REGISTER rdx
  950. incl %gs:pda_irqcount
  951. cmove %rax,%rsp
  952. pushq %rdx
  953. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  954. call __do_softirq
  955. popq %rsp
  956. CFI_DEF_CFA_REGISTER rsp
  957. decl %gs:pda_irqcount
  958. ret
  959. CFI_ENDPROC