entry.S 23 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. .code64
  43. #ifndef CONFIG_PREEMPT
  44. #define retint_kernel retint_restore_args
  45. #endif
  46. /*
  47. * C code is not supposed to know about undefined top of stack. Every time
  48. * a C function with an pt_regs argument is called from the SYSCALL based
  49. * fast path FIXUP_TOP_OF_STACK is needed.
  50. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  51. * manipulation.
  52. */
  53. /* %rsp:at FRAMEEND */
  54. .macro FIXUP_TOP_OF_STACK tmp
  55. movq %gs:pda_oldrsp,\tmp
  56. movq \tmp,RSP(%rsp)
  57. movq $__USER_DS,SS(%rsp)
  58. movq $__USER_CS,CS(%rsp)
  59. movq $-1,RCX(%rsp)
  60. movq R11(%rsp),\tmp /* get eflags */
  61. movq \tmp,EFLAGS(%rsp)
  62. .endm
  63. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  64. movq RSP-\offset(%rsp),\tmp
  65. movq \tmp,%gs:pda_oldrsp
  66. movq EFLAGS-\offset(%rsp),\tmp
  67. movq \tmp,R11-\offset(%rsp)
  68. .endm
  69. .macro FAKE_STACK_FRAME child_rip
  70. /* push in order ss, rsp, eflags, cs, rip */
  71. xorl %eax, %eax
  72. pushq %rax /* ss */
  73. CFI_ADJUST_CFA_OFFSET 8
  74. /*CFI_REL_OFFSET ss,0*/
  75. pushq %rax /* rsp */
  76. CFI_ADJUST_CFA_OFFSET 8
  77. CFI_REL_OFFSET rsp,0
  78. pushq $(1<<9) /* eflags - interrupts on */
  79. CFI_ADJUST_CFA_OFFSET 8
  80. /*CFI_REL_OFFSET rflags,0*/
  81. pushq $__KERNEL_CS /* cs */
  82. CFI_ADJUST_CFA_OFFSET 8
  83. /*CFI_REL_OFFSET cs,0*/
  84. pushq \child_rip /* rip */
  85. CFI_ADJUST_CFA_OFFSET 8
  86. CFI_REL_OFFSET rip,0
  87. pushq %rax /* orig rax */
  88. CFI_ADJUST_CFA_OFFSET 8
  89. .endm
  90. .macro UNFAKE_STACK_FRAME
  91. addq $8*6, %rsp
  92. CFI_ADJUST_CFA_OFFSET -(6*8)
  93. .endm
  94. .macro CFI_DEFAULT_STACK start=1
  95. .if \start
  96. CFI_STARTPROC simple
  97. CFI_DEF_CFA rsp,SS+8
  98. .else
  99. CFI_DEF_CFA_OFFSET SS+8
  100. .endif
  101. CFI_REL_OFFSET r15,R15
  102. CFI_REL_OFFSET r14,R14
  103. CFI_REL_OFFSET r13,R13
  104. CFI_REL_OFFSET r12,R12
  105. CFI_REL_OFFSET rbp,RBP
  106. CFI_REL_OFFSET rbx,RBX
  107. CFI_REL_OFFSET r11,R11
  108. CFI_REL_OFFSET r10,R10
  109. CFI_REL_OFFSET r9,R9
  110. CFI_REL_OFFSET r8,R8
  111. CFI_REL_OFFSET rax,RAX
  112. CFI_REL_OFFSET rcx,RCX
  113. CFI_REL_OFFSET rdx,RDX
  114. CFI_REL_OFFSET rsi,RSI
  115. CFI_REL_OFFSET rdi,RDI
  116. CFI_REL_OFFSET rip,RIP
  117. /*CFI_REL_OFFSET cs,CS*/
  118. /*CFI_REL_OFFSET rflags,EFLAGS*/
  119. CFI_REL_OFFSET rsp,RSP
  120. /*CFI_REL_OFFSET ss,SS*/
  121. .endm
  122. /*
  123. * A newly forked process directly context switches into this.
  124. */
  125. /* rdi: prev */
  126. ENTRY(ret_from_fork)
  127. CFI_DEFAULT_STACK
  128. call schedule_tail
  129. GET_THREAD_INFO(%rcx)
  130. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  131. jnz rff_trace
  132. rff_action:
  133. RESTORE_REST
  134. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  135. je int_ret_from_sys_call
  136. testl $_TIF_IA32,threadinfo_flags(%rcx)
  137. jnz int_ret_from_sys_call
  138. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  139. jmp ret_from_sys_call
  140. rff_trace:
  141. movq %rsp,%rdi
  142. call syscall_trace_leave
  143. GET_THREAD_INFO(%rcx)
  144. jmp rff_action
  145. CFI_ENDPROC
  146. /*
  147. * System call entry. Upto 6 arguments in registers are supported.
  148. *
  149. * SYSCALL does not save anything on the stack and does not change the
  150. * stack pointer.
  151. */
  152. /*
  153. * Register setup:
  154. * rax system call number
  155. * rdi arg0
  156. * rcx return address for syscall/sysret, C arg3
  157. * rsi arg1
  158. * rdx arg2
  159. * r10 arg3 (--> moved to rcx for C)
  160. * r8 arg4
  161. * r9 arg5
  162. * r11 eflags for syscall/sysret, temporary for C
  163. * r12-r15,rbp,rbx saved by C code, not touched.
  164. *
  165. * Interrupts are off on entry.
  166. * Only called from user space.
  167. *
  168. * XXX if we had a free scratch register we could save the RSP into the stack frame
  169. * and report it properly in ps. Unfortunately we haven't.
  170. */
  171. ENTRY(system_call)
  172. CFI_STARTPROC simple
  173. CFI_DEF_CFA rsp,0
  174. CFI_REGISTER rip,rcx
  175. /*CFI_REGISTER rflags,r11*/
  176. swapgs
  177. movq %rsp,%gs:pda_oldrsp
  178. movq %gs:pda_kernelstack,%rsp
  179. sti
  180. SAVE_ARGS 8,1
  181. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  182. movq %rcx,RIP-ARGOFFSET(%rsp)
  183. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  184. GET_THREAD_INFO(%rcx)
  185. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  186. CFI_REMEMBER_STATE
  187. jnz tracesys
  188. cmpq $__NR_syscall_max,%rax
  189. ja badsys
  190. movq %r10,%rcx
  191. call *sys_call_table(,%rax,8) # XXX: rip relative
  192. movq %rax,RAX-ARGOFFSET(%rsp)
  193. /*
  194. * Syscall return path ending with SYSRET (fast path)
  195. * Has incomplete stack frame and undefined top of stack.
  196. */
  197. .globl ret_from_sys_call
  198. ret_from_sys_call:
  199. movl $_TIF_ALLWORK_MASK,%edi
  200. /* edi: flagmask */
  201. sysret_check:
  202. GET_THREAD_INFO(%rcx)
  203. cli
  204. movl threadinfo_flags(%rcx),%edx
  205. andl %edi,%edx
  206. CFI_REMEMBER_STATE
  207. jnz sysret_careful
  208. movq RIP-ARGOFFSET(%rsp),%rcx
  209. CFI_REGISTER rip,rcx
  210. RESTORE_ARGS 0,-ARG_SKIP,1
  211. /*CFI_REGISTER rflags,r11*/
  212. movq %gs:pda_oldrsp,%rsp
  213. swapgs
  214. sysretq
  215. /* Handle reschedules */
  216. /* edx: work, edi: workmask */
  217. sysret_careful:
  218. CFI_RESTORE_STATE
  219. bt $TIF_NEED_RESCHED,%edx
  220. jnc sysret_signal
  221. sti
  222. pushq %rdi
  223. CFI_ADJUST_CFA_OFFSET 8
  224. call schedule
  225. popq %rdi
  226. CFI_ADJUST_CFA_OFFSET -8
  227. jmp sysret_check
  228. /* Handle a signal */
  229. sysret_signal:
  230. sti
  231. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  232. jz 1f
  233. /* Really a signal */
  234. /* edx: work flags (arg3) */
  235. leaq do_notify_resume(%rip),%rax
  236. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  237. xorl %esi,%esi # oldset -> arg2
  238. call ptregscall_common
  239. 1: movl $_TIF_NEED_RESCHED,%edi
  240. jmp sysret_check
  241. badsys:
  242. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  243. jmp ret_from_sys_call
  244. /* Do syscall tracing */
  245. tracesys:
  246. CFI_RESTORE_STATE
  247. SAVE_REST
  248. movq $-ENOSYS,RAX(%rsp)
  249. FIXUP_TOP_OF_STACK %rdi
  250. movq %rsp,%rdi
  251. call syscall_trace_enter
  252. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  253. RESTORE_REST
  254. cmpq $__NR_syscall_max,%rax
  255. ja 1f
  256. movq %r10,%rcx /* fixup for C */
  257. call *sys_call_table(,%rax,8)
  258. movq %rax,RAX-ARGOFFSET(%rsp)
  259. 1: SAVE_REST
  260. movq %rsp,%rdi
  261. call syscall_trace_leave
  262. RESTORE_TOP_OF_STACK %rbx
  263. RESTORE_REST
  264. jmp ret_from_sys_call
  265. CFI_ENDPROC
  266. /*
  267. * Syscall return path ending with IRET.
  268. * Has correct top of stack, but partial stack frame.
  269. */
  270. ENTRY(int_ret_from_sys_call)
  271. CFI_STARTPROC simple
  272. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  273. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  274. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  275. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  276. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  277. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  278. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  279. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  280. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  281. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  282. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  283. CFI_REL_OFFSET r8,R8-ARGOFFSET
  284. CFI_REL_OFFSET r9,R9-ARGOFFSET
  285. CFI_REL_OFFSET r10,R10-ARGOFFSET
  286. CFI_REL_OFFSET r11,R11-ARGOFFSET
  287. cli
  288. testl $3,CS-ARGOFFSET(%rsp)
  289. je retint_restore_args
  290. movl $_TIF_ALLWORK_MASK,%edi
  291. /* edi: mask to check */
  292. int_with_check:
  293. GET_THREAD_INFO(%rcx)
  294. movl threadinfo_flags(%rcx),%edx
  295. andl %edi,%edx
  296. jnz int_careful
  297. andl $~TS_COMPAT,threadinfo_status(%rcx)
  298. jmp retint_swapgs
  299. /* Either reschedule or signal or syscall exit tracking needed. */
  300. /* First do a reschedule test. */
  301. /* edx: work, edi: workmask */
  302. int_careful:
  303. bt $TIF_NEED_RESCHED,%edx
  304. jnc int_very_careful
  305. sti
  306. pushq %rdi
  307. CFI_ADJUST_CFA_OFFSET 8
  308. call schedule
  309. popq %rdi
  310. CFI_ADJUST_CFA_OFFSET -8
  311. cli
  312. jmp int_with_check
  313. /* handle signals and tracing -- both require a full stack frame */
  314. int_very_careful:
  315. sti
  316. SAVE_REST
  317. /* Check for syscall exit trace */
  318. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  319. jz int_signal
  320. pushq %rdi
  321. CFI_ADJUST_CFA_OFFSET 8
  322. leaq 8(%rsp),%rdi # &ptregs -> arg1
  323. call syscall_trace_leave
  324. popq %rdi
  325. CFI_ADJUST_CFA_OFFSET -8
  326. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  327. cli
  328. jmp int_restore_rest
  329. int_signal:
  330. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  331. jz 1f
  332. movq %rsp,%rdi # &ptregs -> arg1
  333. xorl %esi,%esi # oldset -> arg2
  334. call do_notify_resume
  335. 1: movl $_TIF_NEED_RESCHED,%edi
  336. int_restore_rest:
  337. RESTORE_REST
  338. cli
  339. jmp int_with_check
  340. CFI_ENDPROC
  341. /*
  342. * Certain special system calls that need to save a complete full stack frame.
  343. */
  344. .macro PTREGSCALL label,func,arg
  345. .globl \label
  346. \label:
  347. leaq \func(%rip),%rax
  348. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  349. jmp ptregscall_common
  350. .endm
  351. CFI_STARTPROC
  352. PTREGSCALL stub_clone, sys_clone, %r8
  353. PTREGSCALL stub_fork, sys_fork, %rdi
  354. PTREGSCALL stub_vfork, sys_vfork, %rdi
  355. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  356. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  357. PTREGSCALL stub_iopl, sys_iopl, %rsi
  358. ENTRY(ptregscall_common)
  359. popq %r11
  360. CFI_ADJUST_CFA_OFFSET -8
  361. CFI_REGISTER rip, r11
  362. SAVE_REST
  363. movq %r11, %r15
  364. CFI_REGISTER rip, r15
  365. FIXUP_TOP_OF_STACK %r11
  366. call *%rax
  367. RESTORE_TOP_OF_STACK %r11
  368. movq %r15, %r11
  369. CFI_REGISTER rip, r11
  370. RESTORE_REST
  371. pushq %r11
  372. CFI_ADJUST_CFA_OFFSET 8
  373. CFI_REL_OFFSET rip, 0
  374. ret
  375. CFI_ENDPROC
  376. ENTRY(stub_execve)
  377. CFI_STARTPROC
  378. popq %r11
  379. CFI_ADJUST_CFA_OFFSET -8
  380. CFI_REGISTER rip, r11
  381. SAVE_REST
  382. movq %r11, %r15
  383. CFI_REGISTER rip, r15
  384. FIXUP_TOP_OF_STACK %r11
  385. call sys_execve
  386. GET_THREAD_INFO(%rcx)
  387. bt $TIF_IA32,threadinfo_flags(%rcx)
  388. CFI_REMEMBER_STATE
  389. jc exec_32bit
  390. RESTORE_TOP_OF_STACK %r11
  391. movq %r15, %r11
  392. CFI_REGISTER rip, r11
  393. RESTORE_REST
  394. pushq %r11
  395. CFI_ADJUST_CFA_OFFSET 8
  396. CFI_REL_OFFSET rip, 0
  397. ret
  398. exec_32bit:
  399. CFI_RESTORE_STATE
  400. movq %rax,RAX(%rsp)
  401. RESTORE_REST
  402. jmp int_ret_from_sys_call
  403. CFI_ENDPROC
  404. /*
  405. * sigreturn is special because it needs to restore all registers on return.
  406. * This cannot be done with SYSRET, so use the IRET return path instead.
  407. */
  408. ENTRY(stub_rt_sigreturn)
  409. CFI_STARTPROC
  410. addq $8, %rsp
  411. CFI_ADJUST_CFA_OFFSET -8
  412. SAVE_REST
  413. movq %rsp,%rdi
  414. FIXUP_TOP_OF_STACK %r11
  415. call sys_rt_sigreturn
  416. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  417. RESTORE_REST
  418. jmp int_ret_from_sys_call
  419. CFI_ENDPROC
  420. /*
  421. * initial frame state for interrupts and exceptions
  422. */
  423. .macro _frame ref
  424. CFI_STARTPROC simple
  425. CFI_DEF_CFA rsp,SS+8-\ref
  426. /*CFI_REL_OFFSET ss,SS-\ref*/
  427. CFI_REL_OFFSET rsp,RSP-\ref
  428. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  429. /*CFI_REL_OFFSET cs,CS-\ref*/
  430. CFI_REL_OFFSET rip,RIP-\ref
  431. .endm
  432. /* initial frame state for interrupts (and exceptions without error code) */
  433. #define INTR_FRAME _frame RIP
  434. /* initial frame state for exceptions with error code (and interrupts with
  435. vector already pushed) */
  436. #define XCPT_FRAME _frame ORIG_RAX
  437. /*
  438. * Interrupt entry/exit.
  439. *
  440. * Interrupt entry points save only callee clobbered registers in fast path.
  441. *
  442. * Entry runs with interrupts off.
  443. */
  444. /* 0(%rsp): interrupt number */
  445. .macro interrupt func
  446. cld
  447. #ifdef CONFIG_DEBUG_INFO
  448. SAVE_ALL
  449. movq %rsp,%rdi
  450. /*
  451. * Setup a stack frame pointer. This allows gdb to trace
  452. * back to the original stack.
  453. */
  454. movq %rsp,%rbp
  455. CFI_DEF_CFA_REGISTER rbp
  456. #else
  457. SAVE_ARGS
  458. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  459. #endif
  460. testl $3,CS(%rdi)
  461. je 1f
  462. swapgs
  463. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  464. movq %gs:pda_irqstackptr,%rax
  465. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  466. pushq %rdi # save old stack
  467. CFI_ADJUST_CFA_OFFSET 8
  468. call \func
  469. .endm
  470. ENTRY(common_interrupt)
  471. XCPT_FRAME
  472. interrupt do_IRQ
  473. /* 0(%rsp): oldrsp-ARGOFFSET */
  474. ret_from_intr:
  475. popq %rdi
  476. CFI_ADJUST_CFA_OFFSET -8
  477. cli
  478. decl %gs:pda_irqcount
  479. #ifdef CONFIG_DEBUG_INFO
  480. movq RBP(%rdi),%rbp
  481. CFI_DEF_CFA_REGISTER rsp
  482. #endif
  483. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  484. exit_intr:
  485. GET_THREAD_INFO(%rcx)
  486. testl $3,CS-ARGOFFSET(%rsp)
  487. je retint_kernel
  488. /* Interrupt came from user space */
  489. /*
  490. * Has a correct top of stack, but a partial stack frame
  491. * %rcx: thread info. Interrupts off.
  492. */
  493. retint_with_reschedule:
  494. movl $_TIF_WORK_MASK,%edi
  495. retint_check:
  496. movl threadinfo_flags(%rcx),%edx
  497. andl %edi,%edx
  498. CFI_REMEMBER_STATE
  499. jnz retint_careful
  500. retint_swapgs:
  501. swapgs
  502. retint_restore_args:
  503. cli
  504. RESTORE_ARGS 0,8,0
  505. iret_label:
  506. iretq
  507. .section __ex_table,"a"
  508. .quad iret_label,bad_iret
  509. .previous
  510. .section .fixup,"ax"
  511. /* force a signal here? this matches i386 behaviour */
  512. /* running with kernel gs */
  513. bad_iret:
  514. movq $-9999,%rdi /* better code? */
  515. jmp do_exit
  516. .previous
  517. /* edi: workmask, edx: work */
  518. retint_careful:
  519. CFI_RESTORE_STATE
  520. bt $TIF_NEED_RESCHED,%edx
  521. jnc retint_signal
  522. sti
  523. pushq %rdi
  524. CFI_ADJUST_CFA_OFFSET 8
  525. call schedule
  526. popq %rdi
  527. CFI_ADJUST_CFA_OFFSET -8
  528. GET_THREAD_INFO(%rcx)
  529. cli
  530. jmp retint_check
  531. retint_signal:
  532. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  533. jz retint_swapgs
  534. sti
  535. SAVE_REST
  536. movq $-1,ORIG_RAX(%rsp)
  537. xorl %esi,%esi # oldset
  538. movq %rsp,%rdi # &pt_regs
  539. call do_notify_resume
  540. RESTORE_REST
  541. cli
  542. movl $_TIF_NEED_RESCHED,%edi
  543. GET_THREAD_INFO(%rcx)
  544. jmp retint_check
  545. #ifdef CONFIG_PREEMPT
  546. /* Returning to kernel space. Check if we need preemption */
  547. /* rcx: threadinfo. interrupts off. */
  548. .p2align
  549. retint_kernel:
  550. cmpl $0,threadinfo_preempt_count(%rcx)
  551. jnz retint_restore_args
  552. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  553. jnc retint_restore_args
  554. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  555. jnc retint_restore_args
  556. call preempt_schedule_irq
  557. jmp exit_intr
  558. #endif
  559. CFI_ENDPROC
  560. /*
  561. * APIC interrupts.
  562. */
  563. .macro apicinterrupt num,func
  564. INTR_FRAME
  565. pushq $\num-256
  566. CFI_ADJUST_CFA_OFFSET 8
  567. interrupt \func
  568. jmp ret_from_intr
  569. CFI_ENDPROC
  570. .endm
  571. ENTRY(thermal_interrupt)
  572. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  573. ENTRY(threshold_interrupt)
  574. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  575. #ifdef CONFIG_SMP
  576. ENTRY(reschedule_interrupt)
  577. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  578. .macro INVALIDATE_ENTRY num
  579. ENTRY(invalidate_interrupt\num)
  580. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  581. .endm
  582. INVALIDATE_ENTRY 0
  583. INVALIDATE_ENTRY 1
  584. INVALIDATE_ENTRY 2
  585. INVALIDATE_ENTRY 3
  586. INVALIDATE_ENTRY 4
  587. INVALIDATE_ENTRY 5
  588. INVALIDATE_ENTRY 6
  589. INVALIDATE_ENTRY 7
  590. ENTRY(call_function_interrupt)
  591. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  592. #endif
  593. #ifdef CONFIG_X86_LOCAL_APIC
  594. ENTRY(apic_timer_interrupt)
  595. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  596. ENTRY(error_interrupt)
  597. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  598. ENTRY(spurious_interrupt)
  599. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  600. #endif
  601. /*
  602. * Exception entry points.
  603. */
  604. .macro zeroentry sym
  605. INTR_FRAME
  606. pushq $0 /* push error code/oldrax */
  607. CFI_ADJUST_CFA_OFFSET 8
  608. pushq %rax /* push real oldrax to the rdi slot */
  609. CFI_ADJUST_CFA_OFFSET 8
  610. leaq \sym(%rip),%rax
  611. jmp error_entry
  612. CFI_ENDPROC
  613. .endm
  614. .macro errorentry sym
  615. XCPT_FRAME
  616. pushq %rax
  617. CFI_ADJUST_CFA_OFFSET 8
  618. leaq \sym(%rip),%rax
  619. jmp error_entry
  620. CFI_ENDPROC
  621. .endm
  622. /* error code is on the stack already */
  623. /* handle NMI like exceptions that can happen everywhere */
  624. #ifndef DEBUG_IST
  625. # define DEBUG_IST 0
  626. #endif
  627. .macro paranoidentry sym, ist=0
  628. SAVE_ALL
  629. cld
  630. movl $1,%ebx
  631. movl $MSR_GS_BASE,%ecx
  632. rdmsr
  633. testl %edx,%edx
  634. js 1f
  635. swapgs
  636. xorl %ebx,%ebx
  637. 1:
  638. .if \ist
  639. movq %gs:pda_data_offset, %rbp
  640. .endif
  641. movq %rsp,%rdi
  642. movq ORIG_RAX(%rsp),%rsi
  643. movq $-1,ORIG_RAX(%rsp)
  644. .if \ist
  645. subq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  646. .endif
  647. call \sym
  648. .if \ist
  649. addq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  650. .endif
  651. cli
  652. .endm
  653. /*
  654. * Exception entry point. This expects an error code/orig_rax on the stack
  655. * and the exception handler in %rax.
  656. */
  657. ENTRY(error_entry)
  658. _frame RDI
  659. /* rdi slot contains rax, oldrax contains error code */
  660. cld
  661. subq $14*8,%rsp
  662. CFI_ADJUST_CFA_OFFSET (14*8)
  663. movq %rsi,13*8(%rsp)
  664. CFI_REL_OFFSET rsi,RSI
  665. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  666. movq %rdx,12*8(%rsp)
  667. CFI_REL_OFFSET rdx,RDX
  668. movq %rcx,11*8(%rsp)
  669. CFI_REL_OFFSET rcx,RCX
  670. movq %rsi,10*8(%rsp) /* store rax */
  671. CFI_REL_OFFSET rax,RAX
  672. movq %r8, 9*8(%rsp)
  673. CFI_REL_OFFSET r8,R8
  674. movq %r9, 8*8(%rsp)
  675. CFI_REL_OFFSET r9,R9
  676. movq %r10,7*8(%rsp)
  677. CFI_REL_OFFSET r10,R10
  678. movq %r11,6*8(%rsp)
  679. CFI_REL_OFFSET r11,R11
  680. movq %rbx,5*8(%rsp)
  681. CFI_REL_OFFSET rbx,RBX
  682. movq %rbp,4*8(%rsp)
  683. CFI_REL_OFFSET rbp,RBP
  684. movq %r12,3*8(%rsp)
  685. CFI_REL_OFFSET r12,R12
  686. movq %r13,2*8(%rsp)
  687. CFI_REL_OFFSET r13,R13
  688. movq %r14,1*8(%rsp)
  689. CFI_REL_OFFSET r14,R14
  690. movq %r15,(%rsp)
  691. CFI_REL_OFFSET r15,R15
  692. xorl %ebx,%ebx
  693. testl $3,CS(%rsp)
  694. je error_kernelspace
  695. error_swapgs:
  696. swapgs
  697. error_sti:
  698. movq %rdi,RDI(%rsp)
  699. movq %rsp,%rdi
  700. movq ORIG_RAX(%rsp),%rsi /* get error code */
  701. movq $-1,ORIG_RAX(%rsp)
  702. call *%rax
  703. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  704. error_exit:
  705. movl %ebx,%eax
  706. RESTORE_REST
  707. cli
  708. GET_THREAD_INFO(%rcx)
  709. testl %eax,%eax
  710. jne retint_kernel
  711. movl threadinfo_flags(%rcx),%edx
  712. movl $_TIF_WORK_MASK,%edi
  713. andl %edi,%edx
  714. jnz retint_careful
  715. swapgs
  716. RESTORE_ARGS 0,8,0
  717. jmp iret_label
  718. CFI_ENDPROC
  719. error_kernelspace:
  720. incl %ebx
  721. /* There are two places in the kernel that can potentially fault with
  722. usergs. Handle them here. The exception handlers after
  723. iret run with kernel gs again, so don't set the user space flag.
  724. B stepping K8s sometimes report an truncated RIP for IRET
  725. exceptions returning to compat mode. Check for these here too. */
  726. leaq iret_label(%rip),%rbp
  727. cmpq %rbp,RIP(%rsp)
  728. je error_swapgs
  729. movl %ebp,%ebp /* zero extend */
  730. cmpq %rbp,RIP(%rsp)
  731. je error_swapgs
  732. cmpq $gs_change,RIP(%rsp)
  733. je error_swapgs
  734. jmp error_sti
  735. /* Reload gs selector with exception handling */
  736. /* edi: new selector */
  737. ENTRY(load_gs_index)
  738. CFI_STARTPROC
  739. pushf
  740. CFI_ADJUST_CFA_OFFSET 8
  741. cli
  742. swapgs
  743. gs_change:
  744. movl %edi,%gs
  745. 2: mfence /* workaround */
  746. swapgs
  747. popf
  748. CFI_ADJUST_CFA_OFFSET -8
  749. ret
  750. CFI_ENDPROC
  751. .section __ex_table,"a"
  752. .align 8
  753. .quad gs_change,bad_gs
  754. .previous
  755. .section .fixup,"ax"
  756. /* running with kernelgs */
  757. bad_gs:
  758. swapgs /* switch back to user gs */
  759. xorl %eax,%eax
  760. movl %eax,%gs
  761. jmp 2b
  762. .previous
  763. /*
  764. * Create a kernel thread.
  765. *
  766. * C extern interface:
  767. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  768. *
  769. * asm input arguments:
  770. * rdi: fn, rsi: arg, rdx: flags
  771. */
  772. ENTRY(kernel_thread)
  773. CFI_STARTPROC
  774. FAKE_STACK_FRAME $child_rip
  775. SAVE_ALL
  776. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  777. movq %rdx,%rdi
  778. orq kernel_thread_flags(%rip),%rdi
  779. movq $-1, %rsi
  780. movq %rsp, %rdx
  781. xorl %r8d,%r8d
  782. xorl %r9d,%r9d
  783. # clone now
  784. call do_fork
  785. movq %rax,RAX(%rsp)
  786. xorl %edi,%edi
  787. /*
  788. * It isn't worth to check for reschedule here,
  789. * so internally to the x86_64 port you can rely on kernel_thread()
  790. * not to reschedule the child before returning, this avoids the need
  791. * of hacks for example to fork off the per-CPU idle tasks.
  792. * [Hopefully no generic code relies on the reschedule -AK]
  793. */
  794. RESTORE_ALL
  795. UNFAKE_STACK_FRAME
  796. ret
  797. CFI_ENDPROC
  798. child_rip:
  799. /*
  800. * Here we are in the child and the registers are set as they were
  801. * at kernel_thread() invocation in the parent.
  802. */
  803. movq %rdi, %rax
  804. movq %rsi, %rdi
  805. call *%rax
  806. # exit
  807. xorl %edi, %edi
  808. call do_exit
  809. /*
  810. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  811. *
  812. * C extern interface:
  813. * extern long execve(char *name, char **argv, char **envp)
  814. *
  815. * asm input arguments:
  816. * rdi: name, rsi: argv, rdx: envp
  817. *
  818. * We want to fallback into:
  819. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  820. *
  821. * do_sys_execve asm fallback arguments:
  822. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  823. */
  824. ENTRY(execve)
  825. CFI_STARTPROC
  826. FAKE_STACK_FRAME $0
  827. SAVE_ALL
  828. call sys_execve
  829. movq %rax, RAX(%rsp)
  830. RESTORE_REST
  831. testq %rax,%rax
  832. je int_ret_from_sys_call
  833. RESTORE_ARGS
  834. UNFAKE_STACK_FRAME
  835. ret
  836. CFI_ENDPROC
  837. KPROBE_ENTRY(page_fault)
  838. errorentry do_page_fault
  839. .previous .text
  840. ENTRY(coprocessor_error)
  841. zeroentry do_coprocessor_error
  842. ENTRY(simd_coprocessor_error)
  843. zeroentry do_simd_coprocessor_error
  844. ENTRY(device_not_available)
  845. zeroentry math_state_restore
  846. /* runs on exception stack */
  847. KPROBE_ENTRY(debug)
  848. INTR_FRAME
  849. pushq $0
  850. CFI_ADJUST_CFA_OFFSET 8
  851. paranoidentry do_debug, DEBUG_IST
  852. jmp paranoid_exit
  853. CFI_ENDPROC
  854. .previous .text
  855. /* runs on exception stack */
  856. ENTRY(nmi)
  857. INTR_FRAME
  858. pushq $-1
  859. CFI_ADJUST_CFA_OFFSET 8
  860. paranoidentry do_nmi
  861. /*
  862. * "Paranoid" exit path from exception stack.
  863. * Paranoid because this is used by NMIs and cannot take
  864. * any kernel state for granted.
  865. * We don't do kernel preemption checks here, because only
  866. * NMI should be common and it does not enable IRQs and
  867. * cannot get reschedule ticks.
  868. */
  869. /* ebx: no swapgs flag */
  870. paranoid_exit:
  871. testl %ebx,%ebx /* swapgs needed? */
  872. jnz paranoid_restore
  873. testl $3,CS(%rsp)
  874. jnz paranoid_userspace
  875. paranoid_swapgs:
  876. swapgs
  877. paranoid_restore:
  878. RESTORE_ALL 8
  879. iretq
  880. paranoid_userspace:
  881. GET_THREAD_INFO(%rcx)
  882. movl threadinfo_flags(%rcx),%ebx
  883. andl $_TIF_WORK_MASK,%ebx
  884. jz paranoid_swapgs
  885. movq %rsp,%rdi /* &pt_regs */
  886. call sync_regs
  887. movq %rax,%rsp /* switch stack for scheduling */
  888. testl $_TIF_NEED_RESCHED,%ebx
  889. jnz paranoid_schedule
  890. movl %ebx,%edx /* arg3: thread flags */
  891. sti
  892. xorl %esi,%esi /* arg2: oldset */
  893. movq %rsp,%rdi /* arg1: &pt_regs */
  894. call do_notify_resume
  895. cli
  896. jmp paranoid_userspace
  897. paranoid_schedule:
  898. sti
  899. call schedule
  900. cli
  901. jmp paranoid_userspace
  902. CFI_ENDPROC
  903. KPROBE_ENTRY(int3)
  904. INTR_FRAME
  905. pushq $0
  906. CFI_ADJUST_CFA_OFFSET 8
  907. paranoidentry do_int3, DEBUG_IST
  908. jmp paranoid_exit
  909. CFI_ENDPROC
  910. .previous .text
  911. ENTRY(overflow)
  912. zeroentry do_overflow
  913. ENTRY(bounds)
  914. zeroentry do_bounds
  915. ENTRY(invalid_op)
  916. zeroentry do_invalid_op
  917. ENTRY(coprocessor_segment_overrun)
  918. zeroentry do_coprocessor_segment_overrun
  919. ENTRY(reserved)
  920. zeroentry do_reserved
  921. /* runs on exception stack */
  922. ENTRY(double_fault)
  923. XCPT_FRAME
  924. paranoidentry do_double_fault
  925. jmp paranoid_exit
  926. CFI_ENDPROC
  927. ENTRY(invalid_TSS)
  928. errorentry do_invalid_TSS
  929. ENTRY(segment_not_present)
  930. errorentry do_segment_not_present
  931. /* runs on exception stack */
  932. ENTRY(stack_segment)
  933. XCPT_FRAME
  934. paranoidentry do_stack_segment
  935. jmp paranoid_exit
  936. CFI_ENDPROC
  937. KPROBE_ENTRY(general_protection)
  938. errorentry do_general_protection
  939. .previous .text
  940. ENTRY(alignment_check)
  941. errorentry do_alignment_check
  942. ENTRY(divide_error)
  943. zeroentry do_divide_error
  944. ENTRY(spurious_interrupt_bug)
  945. zeroentry do_spurious_interrupt_bug
  946. #ifdef CONFIG_X86_MCE
  947. /* runs on exception stack */
  948. ENTRY(machine_check)
  949. INTR_FRAME
  950. pushq $0
  951. CFI_ADJUST_CFA_OFFSET 8
  952. paranoidentry do_machine_check
  953. jmp paranoid_exit
  954. CFI_ENDPROC
  955. #endif
  956. ENTRY(call_softirq)
  957. CFI_STARTPROC
  958. movq %gs:pda_irqstackptr,%rax
  959. movq %rsp,%rdx
  960. CFI_DEF_CFA_REGISTER rdx
  961. incl %gs:pda_irqcount
  962. cmove %rax,%rsp
  963. pushq %rdx
  964. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  965. call __do_softirq
  966. popq %rsp
  967. CFI_DEF_CFA_REGISTER rsp
  968. decl %gs:pda_irqcount
  969. ret
  970. CFI_ENDPROC