entry.S 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. #include <asm/page.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. /*
  48. * C code is not supposed to know about undefined top of stack. Every time
  49. * a C function with an pt_regs argument is called from the SYSCALL based
  50. * fast path FIXUP_TOP_OF_STACK is needed.
  51. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  52. * manipulation.
  53. */
  54. /* %rsp:at FRAMEEND */
  55. .macro FIXUP_TOP_OF_STACK tmp
  56. movq %gs:pda_oldrsp,\tmp
  57. movq \tmp,RSP(%rsp)
  58. movq $__USER_DS,SS(%rsp)
  59. movq $__USER_CS,CS(%rsp)
  60. movq $-1,RCX(%rsp)
  61. movq R11(%rsp),\tmp /* get eflags */
  62. movq \tmp,EFLAGS(%rsp)
  63. .endm
  64. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  65. movq RSP-\offset(%rsp),\tmp
  66. movq \tmp,%gs:pda_oldrsp
  67. movq EFLAGS-\offset(%rsp),\tmp
  68. movq \tmp,R11-\offset(%rsp)
  69. .endm
  70. .macro FAKE_STACK_FRAME child_rip
  71. /* push in order ss, rsp, eflags, cs, rip */
  72. xorl %eax, %eax
  73. pushq %rax /* ss */
  74. CFI_ADJUST_CFA_OFFSET 8
  75. /*CFI_REL_OFFSET ss,0*/
  76. pushq %rax /* rsp */
  77. CFI_ADJUST_CFA_OFFSET 8
  78. CFI_REL_OFFSET rsp,0
  79. pushq $(1<<9) /* eflags - interrupts on */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET rflags,0*/
  82. pushq $__KERNEL_CS /* cs */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. /*CFI_REL_OFFSET cs,0*/
  85. pushq \child_rip /* rip */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. CFI_REL_OFFSET rip,0
  88. pushq %rax /* orig rax */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. .endm
  91. .macro UNFAKE_STACK_FRAME
  92. addq $8*6, %rsp
  93. CFI_ADJUST_CFA_OFFSET -(6*8)
  94. .endm
  95. .macro CFI_DEFAULT_STACK start=1
  96. .if \start
  97. CFI_STARTPROC simple
  98. CFI_DEF_CFA rsp,SS+8
  99. .else
  100. CFI_DEF_CFA_OFFSET SS+8
  101. .endif
  102. CFI_REL_OFFSET r15,R15
  103. CFI_REL_OFFSET r14,R14
  104. CFI_REL_OFFSET r13,R13
  105. CFI_REL_OFFSET r12,R12
  106. CFI_REL_OFFSET rbp,RBP
  107. CFI_REL_OFFSET rbx,RBX
  108. CFI_REL_OFFSET r11,R11
  109. CFI_REL_OFFSET r10,R10
  110. CFI_REL_OFFSET r9,R9
  111. CFI_REL_OFFSET r8,R8
  112. CFI_REL_OFFSET rax,RAX
  113. CFI_REL_OFFSET rcx,RCX
  114. CFI_REL_OFFSET rdx,RDX
  115. CFI_REL_OFFSET rsi,RSI
  116. CFI_REL_OFFSET rdi,RDI
  117. CFI_REL_OFFSET rip,RIP
  118. /*CFI_REL_OFFSET cs,CS*/
  119. /*CFI_REL_OFFSET rflags,EFLAGS*/
  120. CFI_REL_OFFSET rsp,RSP
  121. /*CFI_REL_OFFSET ss,SS*/
  122. .endm
  123. /*
  124. * A newly forked process directly context switches into this.
  125. */
  126. /* rdi: prev */
  127. ENTRY(ret_from_fork)
  128. CFI_DEFAULT_STACK
  129. call schedule_tail
  130. GET_THREAD_INFO(%rcx)
  131. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  132. jnz rff_trace
  133. rff_action:
  134. RESTORE_REST
  135. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  136. je int_ret_from_sys_call
  137. testl $_TIF_IA32,threadinfo_flags(%rcx)
  138. jnz int_ret_from_sys_call
  139. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  140. jmp ret_from_sys_call
  141. rff_trace:
  142. movq %rsp,%rdi
  143. call syscall_trace_leave
  144. GET_THREAD_INFO(%rcx)
  145. jmp rff_action
  146. CFI_ENDPROC
  147. /*
  148. * System call entry. Upto 6 arguments in registers are supported.
  149. *
  150. * SYSCALL does not save anything on the stack and does not change the
  151. * stack pointer.
  152. */
  153. /*
  154. * Register setup:
  155. * rax system call number
  156. * rdi arg0
  157. * rcx return address for syscall/sysret, C arg3
  158. * rsi arg1
  159. * rdx arg2
  160. * r10 arg3 (--> moved to rcx for C)
  161. * r8 arg4
  162. * r9 arg5
  163. * r11 eflags for syscall/sysret, temporary for C
  164. * r12-r15,rbp,rbx saved by C code, not touched.
  165. *
  166. * Interrupts are off on entry.
  167. * Only called from user space.
  168. *
  169. * XXX if we had a free scratch register we could save the RSP into the stack frame
  170. * and report it properly in ps. Unfortunately we haven't.
  171. *
  172. * When user can change the frames always force IRET. That is because
  173. * it deals with uncanonical addresses better. SYSRET has trouble
  174. * with them due to bugs in both AMD and Intel CPUs.
  175. */
  176. ENTRY(system_call)
  177. CFI_STARTPROC simple
  178. CFI_DEF_CFA rsp,0
  179. CFI_REGISTER rip,rcx
  180. /*CFI_REGISTER rflags,r11*/
  181. swapgs
  182. movq %rsp,%gs:pda_oldrsp
  183. movq %gs:pda_kernelstack,%rsp
  184. sti
  185. SAVE_ARGS 8,1
  186. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  187. movq %rcx,RIP-ARGOFFSET(%rsp)
  188. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  189. GET_THREAD_INFO(%rcx)
  190. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  191. CFI_REMEMBER_STATE
  192. jnz tracesys
  193. cmpq $__NR_syscall_max,%rax
  194. ja badsys
  195. movq %r10,%rcx
  196. call *sys_call_table(,%rax,8) # XXX: rip relative
  197. movq %rax,RAX-ARGOFFSET(%rsp)
  198. /*
  199. * Syscall return path ending with SYSRET (fast path)
  200. * Has incomplete stack frame and undefined top of stack.
  201. */
  202. .globl ret_from_sys_call
  203. ret_from_sys_call:
  204. movl $_TIF_ALLWORK_MASK,%edi
  205. /* edi: flagmask */
  206. sysret_check:
  207. GET_THREAD_INFO(%rcx)
  208. cli
  209. movl threadinfo_flags(%rcx),%edx
  210. andl %edi,%edx
  211. CFI_REMEMBER_STATE
  212. jnz sysret_careful
  213. movq RIP-ARGOFFSET(%rsp),%rcx
  214. CFI_REGISTER rip,rcx
  215. RESTORE_ARGS 0,-ARG_SKIP,1
  216. /*CFI_REGISTER rflags,r11*/
  217. movq %gs:pda_oldrsp,%rsp
  218. swapgs
  219. sysretq
  220. /* Handle reschedules */
  221. /* edx: work, edi: workmask */
  222. sysret_careful:
  223. CFI_RESTORE_STATE
  224. bt $TIF_NEED_RESCHED,%edx
  225. jnc sysret_signal
  226. sti
  227. pushq %rdi
  228. CFI_ADJUST_CFA_OFFSET 8
  229. call schedule
  230. popq %rdi
  231. CFI_ADJUST_CFA_OFFSET -8
  232. jmp sysret_check
  233. /* Handle a signal */
  234. sysret_signal:
  235. sti
  236. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  237. jz 1f
  238. /* Really a signal */
  239. /* edx: work flags (arg3) */
  240. leaq do_notify_resume(%rip),%rax
  241. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  242. xorl %esi,%esi # oldset -> arg2
  243. call ptregscall_common
  244. 1: movl $_TIF_NEED_RESCHED,%edi
  245. /* Use IRET because user could have changed frame. This
  246. works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
  247. cli
  248. jmp int_with_check
  249. badsys:
  250. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  251. jmp ret_from_sys_call
  252. /* Do syscall tracing */
  253. tracesys:
  254. CFI_RESTORE_STATE
  255. SAVE_REST
  256. movq $-ENOSYS,RAX(%rsp)
  257. FIXUP_TOP_OF_STACK %rdi
  258. movq %rsp,%rdi
  259. call syscall_trace_enter
  260. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  261. RESTORE_REST
  262. cmpq $__NR_syscall_max,%rax
  263. ja 1f
  264. movq %r10,%rcx /* fixup for C */
  265. call *sys_call_table(,%rax,8)
  266. 1: movq %rax,RAX-ARGOFFSET(%rsp)
  267. /* Use IRET because user could have changed frame */
  268. jmp int_ret_from_sys_call
  269. CFI_ENDPROC
  270. /*
  271. * Syscall return path ending with IRET.
  272. * Has correct top of stack, but partial stack frame.
  273. */
  274. ENTRY(int_ret_from_sys_call)
  275. CFI_STARTPROC simple
  276. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  277. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  278. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  279. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  280. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  281. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  282. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  283. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  284. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  285. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  286. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  287. CFI_REL_OFFSET r8,R8-ARGOFFSET
  288. CFI_REL_OFFSET r9,R9-ARGOFFSET
  289. CFI_REL_OFFSET r10,R10-ARGOFFSET
  290. CFI_REL_OFFSET r11,R11-ARGOFFSET
  291. cli
  292. testl $3,CS-ARGOFFSET(%rsp)
  293. je retint_restore_args
  294. movl $_TIF_ALLWORK_MASK,%edi
  295. /* edi: mask to check */
  296. int_with_check:
  297. GET_THREAD_INFO(%rcx)
  298. movl threadinfo_flags(%rcx),%edx
  299. andl %edi,%edx
  300. jnz int_careful
  301. andl $~TS_COMPAT,threadinfo_status(%rcx)
  302. jmp retint_swapgs
  303. /* Either reschedule or signal or syscall exit tracking needed. */
  304. /* First do a reschedule test. */
  305. /* edx: work, edi: workmask */
  306. int_careful:
  307. bt $TIF_NEED_RESCHED,%edx
  308. jnc int_very_careful
  309. sti
  310. pushq %rdi
  311. CFI_ADJUST_CFA_OFFSET 8
  312. call schedule
  313. popq %rdi
  314. CFI_ADJUST_CFA_OFFSET -8
  315. cli
  316. jmp int_with_check
  317. /* handle signals and tracing -- both require a full stack frame */
  318. int_very_careful:
  319. sti
  320. SAVE_REST
  321. /* Check for syscall exit trace */
  322. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  323. jz int_signal
  324. pushq %rdi
  325. CFI_ADJUST_CFA_OFFSET 8
  326. leaq 8(%rsp),%rdi # &ptregs -> arg1
  327. call syscall_trace_leave
  328. popq %rdi
  329. CFI_ADJUST_CFA_OFFSET -8
  330. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  331. cli
  332. jmp int_restore_rest
  333. int_signal:
  334. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  335. jz 1f
  336. movq %rsp,%rdi # &ptregs -> arg1
  337. xorl %esi,%esi # oldset -> arg2
  338. call do_notify_resume
  339. 1: movl $_TIF_NEED_RESCHED,%edi
  340. int_restore_rest:
  341. RESTORE_REST
  342. cli
  343. jmp int_with_check
  344. CFI_ENDPROC
  345. /*
  346. * Certain special system calls that need to save a complete full stack frame.
  347. */
  348. .macro PTREGSCALL label,func,arg
  349. .globl \label
  350. \label:
  351. leaq \func(%rip),%rax
  352. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  353. jmp ptregscall_common
  354. .endm
  355. CFI_STARTPROC
  356. PTREGSCALL stub_clone, sys_clone, %r8
  357. PTREGSCALL stub_fork, sys_fork, %rdi
  358. PTREGSCALL stub_vfork, sys_vfork, %rdi
  359. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  360. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  361. PTREGSCALL stub_iopl, sys_iopl, %rsi
  362. ENTRY(ptregscall_common)
  363. popq %r11
  364. CFI_ADJUST_CFA_OFFSET -8
  365. CFI_REGISTER rip, r11
  366. SAVE_REST
  367. movq %r11, %r15
  368. CFI_REGISTER rip, r15
  369. FIXUP_TOP_OF_STACK %r11
  370. call *%rax
  371. RESTORE_TOP_OF_STACK %r11
  372. movq %r15, %r11
  373. CFI_REGISTER rip, r11
  374. RESTORE_REST
  375. pushq %r11
  376. CFI_ADJUST_CFA_OFFSET 8
  377. CFI_REL_OFFSET rip, 0
  378. ret
  379. CFI_ENDPROC
  380. ENTRY(stub_execve)
  381. CFI_STARTPROC
  382. popq %r11
  383. CFI_ADJUST_CFA_OFFSET -8
  384. CFI_REGISTER rip, r11
  385. SAVE_REST
  386. FIXUP_TOP_OF_STACK %r11
  387. call sys_execve
  388. RESTORE_TOP_OF_STACK %r11
  389. movq %rax,RAX(%rsp)
  390. RESTORE_REST
  391. jmp int_ret_from_sys_call
  392. CFI_ENDPROC
  393. /*
  394. * sigreturn is special because it needs to restore all registers on return.
  395. * This cannot be done with SYSRET, so use the IRET return path instead.
  396. */
  397. ENTRY(stub_rt_sigreturn)
  398. CFI_STARTPROC
  399. addq $8, %rsp
  400. CFI_ADJUST_CFA_OFFSET -8
  401. SAVE_REST
  402. movq %rsp,%rdi
  403. FIXUP_TOP_OF_STACK %r11
  404. call sys_rt_sigreturn
  405. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  406. RESTORE_REST
  407. jmp int_ret_from_sys_call
  408. CFI_ENDPROC
  409. /*
  410. * initial frame state for interrupts and exceptions
  411. */
  412. .macro _frame ref
  413. CFI_STARTPROC simple
  414. CFI_DEF_CFA rsp,SS+8-\ref
  415. /*CFI_REL_OFFSET ss,SS-\ref*/
  416. CFI_REL_OFFSET rsp,RSP-\ref
  417. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  418. /*CFI_REL_OFFSET cs,CS-\ref*/
  419. CFI_REL_OFFSET rip,RIP-\ref
  420. .endm
  421. /* initial frame state for interrupts (and exceptions without error code) */
  422. #define INTR_FRAME _frame RIP
  423. /* initial frame state for exceptions with error code (and interrupts with
  424. vector already pushed) */
  425. #define XCPT_FRAME _frame ORIG_RAX
  426. /*
  427. * Interrupt entry/exit.
  428. *
  429. * Interrupt entry points save only callee clobbered registers in fast path.
  430. *
  431. * Entry runs with interrupts off.
  432. */
  433. /* 0(%rsp): interrupt number */
  434. .macro interrupt func
  435. cld
  436. #ifdef CONFIG_DEBUG_INFO
  437. SAVE_ALL
  438. movq %rsp,%rdi
  439. /*
  440. * Setup a stack frame pointer. This allows gdb to trace
  441. * back to the original stack.
  442. */
  443. movq %rsp,%rbp
  444. CFI_DEF_CFA_REGISTER rbp
  445. #else
  446. SAVE_ARGS
  447. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  448. #endif
  449. testl $3,CS(%rdi)
  450. je 1f
  451. swapgs
  452. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  453. movq %gs:pda_irqstackptr,%rax
  454. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  455. pushq %rdi # save old stack
  456. #ifndef CONFIG_DEBUG_INFO
  457. CFI_ADJUST_CFA_OFFSET 8
  458. #endif
  459. call \func
  460. .endm
  461. ENTRY(common_interrupt)
  462. XCPT_FRAME
  463. interrupt do_IRQ
  464. /* 0(%rsp): oldrsp-ARGOFFSET */
  465. ret_from_intr:
  466. popq %rdi
  467. #ifndef CONFIG_DEBUG_INFO
  468. CFI_ADJUST_CFA_OFFSET -8
  469. #endif
  470. cli
  471. decl %gs:pda_irqcount
  472. #ifdef CONFIG_DEBUG_INFO
  473. movq RBP(%rdi),%rbp
  474. CFI_DEF_CFA_REGISTER rsp
  475. #endif
  476. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  477. exit_intr:
  478. GET_THREAD_INFO(%rcx)
  479. testl $3,CS-ARGOFFSET(%rsp)
  480. je retint_kernel
  481. /* Interrupt came from user space */
  482. /*
  483. * Has a correct top of stack, but a partial stack frame
  484. * %rcx: thread info. Interrupts off.
  485. */
  486. retint_with_reschedule:
  487. movl $_TIF_WORK_MASK,%edi
  488. retint_check:
  489. movl threadinfo_flags(%rcx),%edx
  490. andl %edi,%edx
  491. CFI_REMEMBER_STATE
  492. jnz retint_careful
  493. retint_swapgs:
  494. swapgs
  495. retint_restore_args:
  496. cli
  497. RESTORE_ARGS 0,8,0
  498. iret_label:
  499. iretq
  500. .section __ex_table,"a"
  501. .quad iret_label,bad_iret
  502. .previous
  503. .section .fixup,"ax"
  504. /* force a signal here? this matches i386 behaviour */
  505. /* running with kernel gs */
  506. bad_iret:
  507. movq $11,%rdi /* SIGSEGV */
  508. sti
  509. jmp do_exit
  510. .previous
  511. /* edi: workmask, edx: work */
  512. retint_careful:
  513. CFI_RESTORE_STATE
  514. bt $TIF_NEED_RESCHED,%edx
  515. jnc retint_signal
  516. sti
  517. pushq %rdi
  518. CFI_ADJUST_CFA_OFFSET 8
  519. call schedule
  520. popq %rdi
  521. CFI_ADJUST_CFA_OFFSET -8
  522. GET_THREAD_INFO(%rcx)
  523. cli
  524. jmp retint_check
  525. retint_signal:
  526. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  527. jz retint_swapgs
  528. sti
  529. SAVE_REST
  530. movq $-1,ORIG_RAX(%rsp)
  531. xorl %esi,%esi # oldset
  532. movq %rsp,%rdi # &pt_regs
  533. call do_notify_resume
  534. RESTORE_REST
  535. cli
  536. movl $_TIF_NEED_RESCHED,%edi
  537. GET_THREAD_INFO(%rcx)
  538. jmp retint_check
  539. #ifdef CONFIG_PREEMPT
  540. /* Returning to kernel space. Check if we need preemption */
  541. /* rcx: threadinfo. interrupts off. */
  542. .p2align
  543. retint_kernel:
  544. cmpl $0,threadinfo_preempt_count(%rcx)
  545. jnz retint_restore_args
  546. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  547. jnc retint_restore_args
  548. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  549. jnc retint_restore_args
  550. call preempt_schedule_irq
  551. jmp exit_intr
  552. #endif
  553. CFI_ENDPROC
  554. /*
  555. * APIC interrupts.
  556. */
  557. .macro apicinterrupt num,func
  558. INTR_FRAME
  559. pushq $\num-256
  560. CFI_ADJUST_CFA_OFFSET 8
  561. interrupt \func
  562. jmp ret_from_intr
  563. CFI_ENDPROC
  564. .endm
  565. ENTRY(thermal_interrupt)
  566. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  567. ENTRY(threshold_interrupt)
  568. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  569. #ifdef CONFIG_SMP
  570. ENTRY(reschedule_interrupt)
  571. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  572. .macro INVALIDATE_ENTRY num
  573. ENTRY(invalidate_interrupt\num)
  574. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  575. .endm
  576. INVALIDATE_ENTRY 0
  577. INVALIDATE_ENTRY 1
  578. INVALIDATE_ENTRY 2
  579. INVALIDATE_ENTRY 3
  580. INVALIDATE_ENTRY 4
  581. INVALIDATE_ENTRY 5
  582. INVALIDATE_ENTRY 6
  583. INVALIDATE_ENTRY 7
  584. ENTRY(call_function_interrupt)
  585. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  586. #endif
  587. #ifdef CONFIG_X86_LOCAL_APIC
  588. ENTRY(apic_timer_interrupt)
  589. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  590. ENTRY(error_interrupt)
  591. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  592. ENTRY(spurious_interrupt)
  593. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  594. #endif
  595. /*
  596. * Exception entry points.
  597. */
  598. .macro zeroentry sym
  599. INTR_FRAME
  600. pushq $0 /* push error code/oldrax */
  601. CFI_ADJUST_CFA_OFFSET 8
  602. pushq %rax /* push real oldrax to the rdi slot */
  603. CFI_ADJUST_CFA_OFFSET 8
  604. leaq \sym(%rip),%rax
  605. jmp error_entry
  606. CFI_ENDPROC
  607. .endm
  608. .macro errorentry sym
  609. XCPT_FRAME
  610. pushq %rax
  611. CFI_ADJUST_CFA_OFFSET 8
  612. leaq \sym(%rip),%rax
  613. jmp error_entry
  614. CFI_ENDPROC
  615. .endm
  616. /* error code is on the stack already */
  617. /* handle NMI like exceptions that can happen everywhere */
  618. .macro paranoidentry sym, ist=0
  619. SAVE_ALL
  620. cld
  621. movl $1,%ebx
  622. movl $MSR_GS_BASE,%ecx
  623. rdmsr
  624. testl %edx,%edx
  625. js 1f
  626. swapgs
  627. xorl %ebx,%ebx
  628. 1:
  629. .if \ist
  630. movq %gs:pda_data_offset, %rbp
  631. .endif
  632. movq %rsp,%rdi
  633. movq ORIG_RAX(%rsp),%rsi
  634. movq $-1,ORIG_RAX(%rsp)
  635. .if \ist
  636. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  637. .endif
  638. call \sym
  639. .if \ist
  640. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  641. .endif
  642. cli
  643. .endm
  644. /*
  645. * Exception entry point. This expects an error code/orig_rax on the stack
  646. * and the exception handler in %rax.
  647. */
  648. ENTRY(error_entry)
  649. _frame RDI
  650. /* rdi slot contains rax, oldrax contains error code */
  651. cld
  652. subq $14*8,%rsp
  653. CFI_ADJUST_CFA_OFFSET (14*8)
  654. movq %rsi,13*8(%rsp)
  655. CFI_REL_OFFSET rsi,RSI
  656. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  657. movq %rdx,12*8(%rsp)
  658. CFI_REL_OFFSET rdx,RDX
  659. movq %rcx,11*8(%rsp)
  660. CFI_REL_OFFSET rcx,RCX
  661. movq %rsi,10*8(%rsp) /* store rax */
  662. CFI_REL_OFFSET rax,RAX
  663. movq %r8, 9*8(%rsp)
  664. CFI_REL_OFFSET r8,R8
  665. movq %r9, 8*8(%rsp)
  666. CFI_REL_OFFSET r9,R9
  667. movq %r10,7*8(%rsp)
  668. CFI_REL_OFFSET r10,R10
  669. movq %r11,6*8(%rsp)
  670. CFI_REL_OFFSET r11,R11
  671. movq %rbx,5*8(%rsp)
  672. CFI_REL_OFFSET rbx,RBX
  673. movq %rbp,4*8(%rsp)
  674. CFI_REL_OFFSET rbp,RBP
  675. movq %r12,3*8(%rsp)
  676. CFI_REL_OFFSET r12,R12
  677. movq %r13,2*8(%rsp)
  678. CFI_REL_OFFSET r13,R13
  679. movq %r14,1*8(%rsp)
  680. CFI_REL_OFFSET r14,R14
  681. movq %r15,(%rsp)
  682. CFI_REL_OFFSET r15,R15
  683. xorl %ebx,%ebx
  684. testl $3,CS(%rsp)
  685. je error_kernelspace
  686. error_swapgs:
  687. swapgs
  688. error_sti:
  689. movq %rdi,RDI(%rsp)
  690. movq %rsp,%rdi
  691. movq ORIG_RAX(%rsp),%rsi /* get error code */
  692. movq $-1,ORIG_RAX(%rsp)
  693. call *%rax
  694. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  695. error_exit:
  696. movl %ebx,%eax
  697. RESTORE_REST
  698. cli
  699. GET_THREAD_INFO(%rcx)
  700. testl %eax,%eax
  701. jne retint_kernel
  702. movl threadinfo_flags(%rcx),%edx
  703. movl $_TIF_WORK_MASK,%edi
  704. andl %edi,%edx
  705. jnz retint_careful
  706. swapgs
  707. RESTORE_ARGS 0,8,0
  708. jmp iret_label
  709. CFI_ENDPROC
  710. error_kernelspace:
  711. incl %ebx
  712. /* There are two places in the kernel that can potentially fault with
  713. usergs. Handle them here. The exception handlers after
  714. iret run with kernel gs again, so don't set the user space flag.
  715. B stepping K8s sometimes report an truncated RIP for IRET
  716. exceptions returning to compat mode. Check for these here too. */
  717. leaq iret_label(%rip),%rbp
  718. cmpq %rbp,RIP(%rsp)
  719. je error_swapgs
  720. movl %ebp,%ebp /* zero extend */
  721. cmpq %rbp,RIP(%rsp)
  722. je error_swapgs
  723. cmpq $gs_change,RIP(%rsp)
  724. je error_swapgs
  725. jmp error_sti
  726. /* Reload gs selector with exception handling */
  727. /* edi: new selector */
  728. ENTRY(load_gs_index)
  729. CFI_STARTPROC
  730. pushf
  731. CFI_ADJUST_CFA_OFFSET 8
  732. cli
  733. swapgs
  734. gs_change:
  735. movl %edi,%gs
  736. 2: mfence /* workaround */
  737. swapgs
  738. popf
  739. CFI_ADJUST_CFA_OFFSET -8
  740. ret
  741. CFI_ENDPROC
  742. .section __ex_table,"a"
  743. .align 8
  744. .quad gs_change,bad_gs
  745. .previous
  746. .section .fixup,"ax"
  747. /* running with kernelgs */
  748. bad_gs:
  749. swapgs /* switch back to user gs */
  750. xorl %eax,%eax
  751. movl %eax,%gs
  752. jmp 2b
  753. .previous
  754. /*
  755. * Create a kernel thread.
  756. *
  757. * C extern interface:
  758. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  759. *
  760. * asm input arguments:
  761. * rdi: fn, rsi: arg, rdx: flags
  762. */
  763. ENTRY(kernel_thread)
  764. CFI_STARTPROC
  765. FAKE_STACK_FRAME $child_rip
  766. SAVE_ALL
  767. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  768. movq %rdx,%rdi
  769. orq kernel_thread_flags(%rip),%rdi
  770. movq $-1, %rsi
  771. movq %rsp, %rdx
  772. xorl %r8d,%r8d
  773. xorl %r9d,%r9d
  774. # clone now
  775. call do_fork
  776. movq %rax,RAX(%rsp)
  777. xorl %edi,%edi
  778. /*
  779. * It isn't worth to check for reschedule here,
  780. * so internally to the x86_64 port you can rely on kernel_thread()
  781. * not to reschedule the child before returning, this avoids the need
  782. * of hacks for example to fork off the per-CPU idle tasks.
  783. * [Hopefully no generic code relies on the reschedule -AK]
  784. */
  785. RESTORE_ALL
  786. UNFAKE_STACK_FRAME
  787. ret
  788. CFI_ENDPROC
  789. child_rip:
  790. /*
  791. * Here we are in the child and the registers are set as they were
  792. * at kernel_thread() invocation in the parent.
  793. */
  794. movq %rdi, %rax
  795. movq %rsi, %rdi
  796. call *%rax
  797. # exit
  798. xorl %edi, %edi
  799. call do_exit
  800. /*
  801. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  802. *
  803. * C extern interface:
  804. * extern long execve(char *name, char **argv, char **envp)
  805. *
  806. * asm input arguments:
  807. * rdi: name, rsi: argv, rdx: envp
  808. *
  809. * We want to fallback into:
  810. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  811. *
  812. * do_sys_execve asm fallback arguments:
  813. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  814. */
  815. ENTRY(execve)
  816. CFI_STARTPROC
  817. FAKE_STACK_FRAME $0
  818. SAVE_ALL
  819. call sys_execve
  820. movq %rax, RAX(%rsp)
  821. RESTORE_REST
  822. testq %rax,%rax
  823. je int_ret_from_sys_call
  824. RESTORE_ARGS
  825. UNFAKE_STACK_FRAME
  826. ret
  827. CFI_ENDPROC
  828. KPROBE_ENTRY(page_fault)
  829. errorentry do_page_fault
  830. .previous .text
  831. ENTRY(coprocessor_error)
  832. zeroentry do_coprocessor_error
  833. ENTRY(simd_coprocessor_error)
  834. zeroentry do_simd_coprocessor_error
  835. ENTRY(device_not_available)
  836. zeroentry math_state_restore
  837. /* runs on exception stack */
  838. KPROBE_ENTRY(debug)
  839. INTR_FRAME
  840. pushq $0
  841. CFI_ADJUST_CFA_OFFSET 8
  842. paranoidentry do_debug, DEBUG_STACK
  843. jmp paranoid_exit
  844. CFI_ENDPROC
  845. .previous .text
  846. /* runs on exception stack */
  847. KPROBE_ENTRY(nmi)
  848. INTR_FRAME
  849. pushq $-1
  850. CFI_ADJUST_CFA_OFFSET 8
  851. paranoidentry do_nmi
  852. /*
  853. * "Paranoid" exit path from exception stack.
  854. * Paranoid because this is used by NMIs and cannot take
  855. * any kernel state for granted.
  856. * We don't do kernel preemption checks here, because only
  857. * NMI should be common and it does not enable IRQs and
  858. * cannot get reschedule ticks.
  859. */
  860. /* ebx: no swapgs flag */
  861. paranoid_exit:
  862. testl %ebx,%ebx /* swapgs needed? */
  863. jnz paranoid_restore
  864. testl $3,CS(%rsp)
  865. jnz paranoid_userspace
  866. paranoid_swapgs:
  867. swapgs
  868. paranoid_restore:
  869. RESTORE_ALL 8
  870. iretq
  871. paranoid_userspace:
  872. GET_THREAD_INFO(%rcx)
  873. movl threadinfo_flags(%rcx),%ebx
  874. andl $_TIF_WORK_MASK,%ebx
  875. jz paranoid_swapgs
  876. movq %rsp,%rdi /* &pt_regs */
  877. call sync_regs
  878. movq %rax,%rsp /* switch stack for scheduling */
  879. testl $_TIF_NEED_RESCHED,%ebx
  880. jnz paranoid_schedule
  881. movl %ebx,%edx /* arg3: thread flags */
  882. sti
  883. xorl %esi,%esi /* arg2: oldset */
  884. movq %rsp,%rdi /* arg1: &pt_regs */
  885. call do_notify_resume
  886. cli
  887. jmp paranoid_userspace
  888. paranoid_schedule:
  889. sti
  890. call schedule
  891. cli
  892. jmp paranoid_userspace
  893. CFI_ENDPROC
  894. .previous .text
  895. KPROBE_ENTRY(int3)
  896. INTR_FRAME
  897. pushq $0
  898. CFI_ADJUST_CFA_OFFSET 8
  899. paranoidentry do_int3, DEBUG_STACK
  900. jmp paranoid_exit
  901. CFI_ENDPROC
  902. .previous .text
  903. ENTRY(overflow)
  904. zeroentry do_overflow
  905. ENTRY(bounds)
  906. zeroentry do_bounds
  907. ENTRY(invalid_op)
  908. zeroentry do_invalid_op
  909. ENTRY(coprocessor_segment_overrun)
  910. zeroentry do_coprocessor_segment_overrun
  911. ENTRY(reserved)
  912. zeroentry do_reserved
  913. /* runs on exception stack */
  914. ENTRY(double_fault)
  915. XCPT_FRAME
  916. paranoidentry do_double_fault
  917. jmp paranoid_exit
  918. CFI_ENDPROC
  919. ENTRY(invalid_TSS)
  920. errorentry do_invalid_TSS
  921. ENTRY(segment_not_present)
  922. errorentry do_segment_not_present
  923. /* runs on exception stack */
  924. ENTRY(stack_segment)
  925. XCPT_FRAME
  926. paranoidentry do_stack_segment
  927. jmp paranoid_exit
  928. CFI_ENDPROC
  929. KPROBE_ENTRY(general_protection)
  930. errorentry do_general_protection
  931. .previous .text
  932. ENTRY(alignment_check)
  933. errorentry do_alignment_check
  934. ENTRY(divide_error)
  935. zeroentry do_divide_error
  936. ENTRY(spurious_interrupt_bug)
  937. zeroentry do_spurious_interrupt_bug
  938. #ifdef CONFIG_X86_MCE
  939. /* runs on exception stack */
  940. ENTRY(machine_check)
  941. INTR_FRAME
  942. pushq $0
  943. CFI_ADJUST_CFA_OFFSET 8
  944. paranoidentry do_machine_check
  945. jmp paranoid_exit
  946. CFI_ENDPROC
  947. #endif
  948. ENTRY(call_softirq)
  949. CFI_STARTPROC
  950. movq %gs:pda_irqstackptr,%rax
  951. movq %rsp,%rdx
  952. CFI_DEF_CFA_REGISTER rdx
  953. incl %gs:pda_irqcount
  954. cmove %rax,%rsp
  955. pushq %rdx
  956. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  957. call __do_softirq
  958. popq %rsp
  959. CFI_DEF_CFA_REGISTER rsp
  960. decl %gs:pda_irqcount
  961. ret
  962. CFI_ENDPROC