entry.S 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. /*
  2. * linux/arch/x86_64/entry.S
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
  6. * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
  7. *
  8. * $Id$
  9. */
  10. /*
  11. * entry.S contains the system-call and fault low-level handling routines.
  12. *
  13. * NOTE: This code handles signal-recognition, which happens every time
  14. * after an interrupt and after each system call.
  15. *
  16. * Normal syscalls and interrupts don't save a full stack frame, this is
  17. * only done for syscall tracing, signals or fork/exec et.al.
  18. *
  19. * A note on terminology:
  20. * - top of stack: Architecture defined interrupt frame from SS to RIP
  21. * at the top of the kernel process stack.
  22. * - partial stack frame: partially saved registers upto R11.
  23. * - full stack frame: Like partial stack frame, but all register saved.
  24. *
  25. * TODO:
  26. * - schedule it carefully for the final hardware.
  27. */
  28. #define ASSEMBLY 1
  29. #include <linux/config.h>
  30. #include <linux/linkage.h>
  31. #include <asm/segment.h>
  32. #include <asm/smp.h>
  33. #include <asm/cache.h>
  34. #include <asm/errno.h>
  35. #include <asm/dwarf2.h>
  36. #include <asm/calling.h>
  37. #include <asm/asm-offsets.h>
  38. #include <asm/msr.h>
  39. #include <asm/unistd.h>
  40. #include <asm/thread_info.h>
  41. #include <asm/hw_irq.h>
  42. #include <asm/page.h>
  43. .code64
  44. #ifndef CONFIG_PREEMPT
  45. #define retint_kernel retint_restore_args
  46. #endif
  47. /*
  48. * C code is not supposed to know about undefined top of stack. Every time
  49. * a C function with an pt_regs argument is called from the SYSCALL based
  50. * fast path FIXUP_TOP_OF_STACK is needed.
  51. * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
  52. * manipulation.
  53. */
  54. /* %rsp:at FRAMEEND */
  55. .macro FIXUP_TOP_OF_STACK tmp
  56. movq %gs:pda_oldrsp,\tmp
  57. movq \tmp,RSP(%rsp)
  58. movq $__USER_DS,SS(%rsp)
  59. movq $__USER_CS,CS(%rsp)
  60. movq $-1,RCX(%rsp)
  61. movq R11(%rsp),\tmp /* get eflags */
  62. movq \tmp,EFLAGS(%rsp)
  63. .endm
  64. .macro RESTORE_TOP_OF_STACK tmp,offset=0
  65. movq RSP-\offset(%rsp),\tmp
  66. movq \tmp,%gs:pda_oldrsp
  67. movq EFLAGS-\offset(%rsp),\tmp
  68. movq \tmp,R11-\offset(%rsp)
  69. .endm
  70. .macro FAKE_STACK_FRAME child_rip
  71. /* push in order ss, rsp, eflags, cs, rip */
  72. xorl %eax, %eax
  73. pushq %rax /* ss */
  74. CFI_ADJUST_CFA_OFFSET 8
  75. /*CFI_REL_OFFSET ss,0*/
  76. pushq %rax /* rsp */
  77. CFI_ADJUST_CFA_OFFSET 8
  78. CFI_REL_OFFSET rsp,0
  79. pushq $(1<<9) /* eflags - interrupts on */
  80. CFI_ADJUST_CFA_OFFSET 8
  81. /*CFI_REL_OFFSET rflags,0*/
  82. pushq $__KERNEL_CS /* cs */
  83. CFI_ADJUST_CFA_OFFSET 8
  84. /*CFI_REL_OFFSET cs,0*/
  85. pushq \child_rip /* rip */
  86. CFI_ADJUST_CFA_OFFSET 8
  87. CFI_REL_OFFSET rip,0
  88. pushq %rax /* orig rax */
  89. CFI_ADJUST_CFA_OFFSET 8
  90. .endm
  91. .macro UNFAKE_STACK_FRAME
  92. addq $8*6, %rsp
  93. CFI_ADJUST_CFA_OFFSET -(6*8)
  94. .endm
  95. .macro CFI_DEFAULT_STACK start=1
  96. .if \start
  97. CFI_STARTPROC simple
  98. CFI_DEF_CFA rsp,SS+8
  99. .else
  100. CFI_DEF_CFA_OFFSET SS+8
  101. .endif
  102. CFI_REL_OFFSET r15,R15
  103. CFI_REL_OFFSET r14,R14
  104. CFI_REL_OFFSET r13,R13
  105. CFI_REL_OFFSET r12,R12
  106. CFI_REL_OFFSET rbp,RBP
  107. CFI_REL_OFFSET rbx,RBX
  108. CFI_REL_OFFSET r11,R11
  109. CFI_REL_OFFSET r10,R10
  110. CFI_REL_OFFSET r9,R9
  111. CFI_REL_OFFSET r8,R8
  112. CFI_REL_OFFSET rax,RAX
  113. CFI_REL_OFFSET rcx,RCX
  114. CFI_REL_OFFSET rdx,RDX
  115. CFI_REL_OFFSET rsi,RSI
  116. CFI_REL_OFFSET rdi,RDI
  117. CFI_REL_OFFSET rip,RIP
  118. /*CFI_REL_OFFSET cs,CS*/
  119. /*CFI_REL_OFFSET rflags,EFLAGS*/
  120. CFI_REL_OFFSET rsp,RSP
  121. /*CFI_REL_OFFSET ss,SS*/
  122. .endm
  123. /*
  124. * A newly forked process directly context switches into this.
  125. */
  126. /* rdi: prev */
  127. ENTRY(ret_from_fork)
  128. CFI_DEFAULT_STACK
  129. call schedule_tail
  130. GET_THREAD_INFO(%rcx)
  131. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
  132. jnz rff_trace
  133. rff_action:
  134. RESTORE_REST
  135. testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
  136. je int_ret_from_sys_call
  137. testl $_TIF_IA32,threadinfo_flags(%rcx)
  138. jnz int_ret_from_sys_call
  139. RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
  140. jmp ret_from_sys_call
  141. rff_trace:
  142. movq %rsp,%rdi
  143. call syscall_trace_leave
  144. GET_THREAD_INFO(%rcx)
  145. jmp rff_action
  146. CFI_ENDPROC
  147. /*
  148. * System call entry. Upto 6 arguments in registers are supported.
  149. *
  150. * SYSCALL does not save anything on the stack and does not change the
  151. * stack pointer.
  152. */
  153. /*
  154. * Register setup:
  155. * rax system call number
  156. * rdi arg0
  157. * rcx return address for syscall/sysret, C arg3
  158. * rsi arg1
  159. * rdx arg2
  160. * r10 arg3 (--> moved to rcx for C)
  161. * r8 arg4
  162. * r9 arg5
  163. * r11 eflags for syscall/sysret, temporary for C
  164. * r12-r15,rbp,rbx saved by C code, not touched.
  165. *
  166. * Interrupts are off on entry.
  167. * Only called from user space.
  168. *
  169. * XXX if we had a free scratch register we could save the RSP into the stack frame
  170. * and report it properly in ps. Unfortunately we haven't.
  171. */
  172. ENTRY(system_call)
  173. CFI_STARTPROC simple
  174. CFI_DEF_CFA rsp,0
  175. CFI_REGISTER rip,rcx
  176. /*CFI_REGISTER rflags,r11*/
  177. swapgs
  178. movq %rsp,%gs:pda_oldrsp
  179. movq %gs:pda_kernelstack,%rsp
  180. sti
  181. SAVE_ARGS 8,1
  182. movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
  183. movq %rcx,RIP-ARGOFFSET(%rsp)
  184. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  185. GET_THREAD_INFO(%rcx)
  186. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
  187. CFI_REMEMBER_STATE
  188. jnz tracesys
  189. cmpq $__NR_syscall_max,%rax
  190. ja badsys
  191. movq %r10,%rcx
  192. call *sys_call_table(,%rax,8) # XXX: rip relative
  193. movq %rax,RAX-ARGOFFSET(%rsp)
  194. /*
  195. * Syscall return path ending with SYSRET (fast path)
  196. * Has incomplete stack frame and undefined top of stack.
  197. */
  198. .globl ret_from_sys_call
  199. ret_from_sys_call:
  200. movl $_TIF_ALLWORK_MASK,%edi
  201. /* edi: flagmask */
  202. sysret_check:
  203. GET_THREAD_INFO(%rcx)
  204. cli
  205. movl threadinfo_flags(%rcx),%edx
  206. andl %edi,%edx
  207. CFI_REMEMBER_STATE
  208. jnz sysret_careful
  209. movq RIP-ARGOFFSET(%rsp),%rcx
  210. CFI_REGISTER rip,rcx
  211. RESTORE_ARGS 0,-ARG_SKIP,1
  212. /*CFI_REGISTER rflags,r11*/
  213. movq %gs:pda_oldrsp,%rsp
  214. swapgs
  215. sysretq
  216. /* Handle reschedules */
  217. /* edx: work, edi: workmask */
  218. sysret_careful:
  219. CFI_RESTORE_STATE
  220. bt $TIF_NEED_RESCHED,%edx
  221. jnc sysret_signal
  222. sti
  223. pushq %rdi
  224. CFI_ADJUST_CFA_OFFSET 8
  225. call schedule
  226. popq %rdi
  227. CFI_ADJUST_CFA_OFFSET -8
  228. jmp sysret_check
  229. /* Handle a signal */
  230. sysret_signal:
  231. sti
  232. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  233. jz 1f
  234. /* Really a signal */
  235. /* edx: work flags (arg3) */
  236. leaq do_notify_resume(%rip),%rax
  237. leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
  238. xorl %esi,%esi # oldset -> arg2
  239. call ptregscall_common
  240. 1: movl $_TIF_NEED_RESCHED,%edi
  241. jmp sysret_check
  242. badsys:
  243. movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
  244. jmp ret_from_sys_call
  245. /* Do syscall tracing */
  246. tracesys:
  247. CFI_RESTORE_STATE
  248. SAVE_REST
  249. movq $-ENOSYS,RAX(%rsp)
  250. FIXUP_TOP_OF_STACK %rdi
  251. movq %rsp,%rdi
  252. call syscall_trace_enter
  253. LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
  254. RESTORE_REST
  255. cmpq $__NR_syscall_max,%rax
  256. ja 1f
  257. movq %r10,%rcx /* fixup for C */
  258. call *sys_call_table(,%rax,8)
  259. movq %rax,RAX-ARGOFFSET(%rsp)
  260. 1: SAVE_REST
  261. movq %rsp,%rdi
  262. call syscall_trace_leave
  263. RESTORE_TOP_OF_STACK %rbx
  264. RESTORE_REST
  265. jmp ret_from_sys_call
  266. CFI_ENDPROC
  267. /*
  268. * Syscall return path ending with IRET.
  269. * Has correct top of stack, but partial stack frame.
  270. */
  271. ENTRY(int_ret_from_sys_call)
  272. CFI_STARTPROC simple
  273. CFI_DEF_CFA rsp,SS+8-ARGOFFSET
  274. /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
  275. CFI_REL_OFFSET rsp,RSP-ARGOFFSET
  276. /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
  277. /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
  278. CFI_REL_OFFSET rip,RIP-ARGOFFSET
  279. CFI_REL_OFFSET rdx,RDX-ARGOFFSET
  280. CFI_REL_OFFSET rcx,RCX-ARGOFFSET
  281. CFI_REL_OFFSET rax,RAX-ARGOFFSET
  282. CFI_REL_OFFSET rdi,RDI-ARGOFFSET
  283. CFI_REL_OFFSET rsi,RSI-ARGOFFSET
  284. CFI_REL_OFFSET r8,R8-ARGOFFSET
  285. CFI_REL_OFFSET r9,R9-ARGOFFSET
  286. CFI_REL_OFFSET r10,R10-ARGOFFSET
  287. CFI_REL_OFFSET r11,R11-ARGOFFSET
  288. cli
  289. testl $3,CS-ARGOFFSET(%rsp)
  290. je retint_restore_args
  291. movl $_TIF_ALLWORK_MASK,%edi
  292. /* edi: mask to check */
  293. int_with_check:
  294. GET_THREAD_INFO(%rcx)
  295. movl threadinfo_flags(%rcx),%edx
  296. andl %edi,%edx
  297. jnz int_careful
  298. andl $~TS_COMPAT,threadinfo_status(%rcx)
  299. jmp retint_swapgs
  300. /* Either reschedule or signal or syscall exit tracking needed. */
  301. /* First do a reschedule test. */
  302. /* edx: work, edi: workmask */
  303. int_careful:
  304. bt $TIF_NEED_RESCHED,%edx
  305. jnc int_very_careful
  306. sti
  307. pushq %rdi
  308. CFI_ADJUST_CFA_OFFSET 8
  309. call schedule
  310. popq %rdi
  311. CFI_ADJUST_CFA_OFFSET -8
  312. cli
  313. jmp int_with_check
  314. /* handle signals and tracing -- both require a full stack frame */
  315. int_very_careful:
  316. sti
  317. SAVE_REST
  318. /* Check for syscall exit trace */
  319. testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
  320. jz int_signal
  321. pushq %rdi
  322. CFI_ADJUST_CFA_OFFSET 8
  323. leaq 8(%rsp),%rdi # &ptregs -> arg1
  324. call syscall_trace_leave
  325. popq %rdi
  326. CFI_ADJUST_CFA_OFFSET -8
  327. andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
  328. cli
  329. jmp int_restore_rest
  330. int_signal:
  331. testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
  332. jz 1f
  333. movq %rsp,%rdi # &ptregs -> arg1
  334. xorl %esi,%esi # oldset -> arg2
  335. call do_notify_resume
  336. 1: movl $_TIF_NEED_RESCHED,%edi
  337. int_restore_rest:
  338. RESTORE_REST
  339. cli
  340. jmp int_with_check
  341. CFI_ENDPROC
  342. /*
  343. * Certain special system calls that need to save a complete full stack frame.
  344. */
  345. .macro PTREGSCALL label,func,arg
  346. .globl \label
  347. \label:
  348. leaq \func(%rip),%rax
  349. leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
  350. jmp ptregscall_common
  351. .endm
  352. CFI_STARTPROC
  353. PTREGSCALL stub_clone, sys_clone, %r8
  354. PTREGSCALL stub_fork, sys_fork, %rdi
  355. PTREGSCALL stub_vfork, sys_vfork, %rdi
  356. PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
  357. PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
  358. PTREGSCALL stub_iopl, sys_iopl, %rsi
  359. ENTRY(ptregscall_common)
  360. popq %r11
  361. CFI_ADJUST_CFA_OFFSET -8
  362. CFI_REGISTER rip, r11
  363. SAVE_REST
  364. movq %r11, %r15
  365. CFI_REGISTER rip, r15
  366. FIXUP_TOP_OF_STACK %r11
  367. call *%rax
  368. RESTORE_TOP_OF_STACK %r11
  369. movq %r15, %r11
  370. CFI_REGISTER rip, r11
  371. RESTORE_REST
  372. pushq %r11
  373. CFI_ADJUST_CFA_OFFSET 8
  374. CFI_REL_OFFSET rip, 0
  375. ret
  376. CFI_ENDPROC
  377. ENTRY(stub_execve)
  378. CFI_STARTPROC
  379. popq %r11
  380. CFI_ADJUST_CFA_OFFSET -8
  381. CFI_REGISTER rip, r11
  382. SAVE_REST
  383. movq %r11, %r15
  384. CFI_REGISTER rip, r15
  385. FIXUP_TOP_OF_STACK %r11
  386. call sys_execve
  387. GET_THREAD_INFO(%rcx)
  388. bt $TIF_IA32,threadinfo_flags(%rcx)
  389. CFI_REMEMBER_STATE
  390. jc exec_32bit
  391. RESTORE_TOP_OF_STACK %r11
  392. movq %r15, %r11
  393. CFI_REGISTER rip, r11
  394. RESTORE_REST
  395. pushq %r11
  396. CFI_ADJUST_CFA_OFFSET 8
  397. CFI_REL_OFFSET rip, 0
  398. ret
  399. exec_32bit:
  400. CFI_RESTORE_STATE
  401. movq %rax,RAX(%rsp)
  402. RESTORE_REST
  403. jmp int_ret_from_sys_call
  404. CFI_ENDPROC
  405. /*
  406. * sigreturn is special because it needs to restore all registers on return.
  407. * This cannot be done with SYSRET, so use the IRET return path instead.
  408. */
  409. ENTRY(stub_rt_sigreturn)
  410. CFI_STARTPROC
  411. addq $8, %rsp
  412. CFI_ADJUST_CFA_OFFSET -8
  413. SAVE_REST
  414. movq %rsp,%rdi
  415. FIXUP_TOP_OF_STACK %r11
  416. call sys_rt_sigreturn
  417. movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
  418. RESTORE_REST
  419. jmp int_ret_from_sys_call
  420. CFI_ENDPROC
  421. /*
  422. * initial frame state for interrupts and exceptions
  423. */
  424. .macro _frame ref
  425. CFI_STARTPROC simple
  426. CFI_DEF_CFA rsp,SS+8-\ref
  427. /*CFI_REL_OFFSET ss,SS-\ref*/
  428. CFI_REL_OFFSET rsp,RSP-\ref
  429. /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
  430. /*CFI_REL_OFFSET cs,CS-\ref*/
  431. CFI_REL_OFFSET rip,RIP-\ref
  432. .endm
  433. /* initial frame state for interrupts (and exceptions without error code) */
  434. #define INTR_FRAME _frame RIP
  435. /* initial frame state for exceptions with error code (and interrupts with
  436. vector already pushed) */
  437. #define XCPT_FRAME _frame ORIG_RAX
  438. /*
  439. * Interrupt entry/exit.
  440. *
  441. * Interrupt entry points save only callee clobbered registers in fast path.
  442. *
  443. * Entry runs with interrupts off.
  444. */
  445. /* 0(%rsp): interrupt number */
  446. .macro interrupt func
  447. cld
  448. #ifdef CONFIG_DEBUG_INFO
  449. SAVE_ALL
  450. movq %rsp,%rdi
  451. /*
  452. * Setup a stack frame pointer. This allows gdb to trace
  453. * back to the original stack.
  454. */
  455. movq %rsp,%rbp
  456. CFI_DEF_CFA_REGISTER rbp
  457. #else
  458. SAVE_ARGS
  459. leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
  460. #endif
  461. testl $3,CS(%rdi)
  462. je 1f
  463. swapgs
  464. 1: incl %gs:pda_irqcount # RED-PEN should check preempt count
  465. movq %gs:pda_irqstackptr,%rax
  466. cmoveq %rax,%rsp /*todo This needs CFI annotation! */
  467. pushq %rdi # save old stack
  468. #ifndef CONFIG_DEBUG_INFO
  469. CFI_ADJUST_CFA_OFFSET 8
  470. #endif
  471. call \func
  472. .endm
  473. ENTRY(common_interrupt)
  474. XCPT_FRAME
  475. interrupt do_IRQ
  476. /* 0(%rsp): oldrsp-ARGOFFSET */
  477. ret_from_intr:
  478. popq %rdi
  479. #ifndef CONFIG_DEBUG_INFO
  480. CFI_ADJUST_CFA_OFFSET -8
  481. #endif
  482. cli
  483. decl %gs:pda_irqcount
  484. #ifdef CONFIG_DEBUG_INFO
  485. movq RBP(%rdi),%rbp
  486. CFI_DEF_CFA_REGISTER rsp
  487. #endif
  488. leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
  489. exit_intr:
  490. GET_THREAD_INFO(%rcx)
  491. testl $3,CS-ARGOFFSET(%rsp)
  492. je retint_kernel
  493. /* Interrupt came from user space */
  494. /*
  495. * Has a correct top of stack, but a partial stack frame
  496. * %rcx: thread info. Interrupts off.
  497. */
  498. retint_with_reschedule:
  499. movl $_TIF_WORK_MASK,%edi
  500. retint_check:
  501. movl threadinfo_flags(%rcx),%edx
  502. andl %edi,%edx
  503. CFI_REMEMBER_STATE
  504. jnz retint_careful
  505. retint_swapgs:
  506. swapgs
  507. retint_restore_args:
  508. cli
  509. RESTORE_ARGS 0,8,0
  510. iret_label:
  511. iretq
  512. .section __ex_table,"a"
  513. .quad iret_label,bad_iret
  514. .previous
  515. .section .fixup,"ax"
  516. /* force a signal here? this matches i386 behaviour */
  517. /* running with kernel gs */
  518. bad_iret:
  519. movq $-9999,%rdi /* better code? */
  520. jmp do_exit
  521. .previous
  522. /* edi: workmask, edx: work */
  523. retint_careful:
  524. CFI_RESTORE_STATE
  525. bt $TIF_NEED_RESCHED,%edx
  526. jnc retint_signal
  527. sti
  528. pushq %rdi
  529. CFI_ADJUST_CFA_OFFSET 8
  530. call schedule
  531. popq %rdi
  532. CFI_ADJUST_CFA_OFFSET -8
  533. GET_THREAD_INFO(%rcx)
  534. cli
  535. jmp retint_check
  536. retint_signal:
  537. testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  538. jz retint_swapgs
  539. sti
  540. SAVE_REST
  541. movq $-1,ORIG_RAX(%rsp)
  542. xorl %esi,%esi # oldset
  543. movq %rsp,%rdi # &pt_regs
  544. call do_notify_resume
  545. RESTORE_REST
  546. cli
  547. movl $_TIF_NEED_RESCHED,%edi
  548. GET_THREAD_INFO(%rcx)
  549. jmp retint_check
  550. #ifdef CONFIG_PREEMPT
  551. /* Returning to kernel space. Check if we need preemption */
  552. /* rcx: threadinfo. interrupts off. */
  553. .p2align
  554. retint_kernel:
  555. cmpl $0,threadinfo_preempt_count(%rcx)
  556. jnz retint_restore_args
  557. bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
  558. jnc retint_restore_args
  559. bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
  560. jnc retint_restore_args
  561. call preempt_schedule_irq
  562. jmp exit_intr
  563. #endif
  564. CFI_ENDPROC
  565. /*
  566. * APIC interrupts.
  567. */
  568. .macro apicinterrupt num,func
  569. INTR_FRAME
  570. pushq $\num-256
  571. CFI_ADJUST_CFA_OFFSET 8
  572. interrupt \func
  573. jmp ret_from_intr
  574. CFI_ENDPROC
  575. .endm
  576. ENTRY(thermal_interrupt)
  577. apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
  578. ENTRY(threshold_interrupt)
  579. apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
  580. #ifdef CONFIG_SMP
  581. ENTRY(reschedule_interrupt)
  582. apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
  583. .macro INVALIDATE_ENTRY num
  584. ENTRY(invalidate_interrupt\num)
  585. apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
  586. .endm
  587. INVALIDATE_ENTRY 0
  588. INVALIDATE_ENTRY 1
  589. INVALIDATE_ENTRY 2
  590. INVALIDATE_ENTRY 3
  591. INVALIDATE_ENTRY 4
  592. INVALIDATE_ENTRY 5
  593. INVALIDATE_ENTRY 6
  594. INVALIDATE_ENTRY 7
  595. ENTRY(call_function_interrupt)
  596. apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
  597. #endif
  598. #ifdef CONFIG_X86_LOCAL_APIC
  599. ENTRY(apic_timer_interrupt)
  600. apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
  601. ENTRY(error_interrupt)
  602. apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
  603. ENTRY(spurious_interrupt)
  604. apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
  605. #endif
  606. /*
  607. * Exception entry points.
  608. */
  609. .macro zeroentry sym
  610. INTR_FRAME
  611. pushq $0 /* push error code/oldrax */
  612. CFI_ADJUST_CFA_OFFSET 8
  613. pushq %rax /* push real oldrax to the rdi slot */
  614. CFI_ADJUST_CFA_OFFSET 8
  615. leaq \sym(%rip),%rax
  616. jmp error_entry
  617. CFI_ENDPROC
  618. .endm
  619. .macro errorentry sym
  620. XCPT_FRAME
  621. pushq %rax
  622. CFI_ADJUST_CFA_OFFSET 8
  623. leaq \sym(%rip),%rax
  624. jmp error_entry
  625. CFI_ENDPROC
  626. .endm
  627. /* error code is on the stack already */
  628. /* handle NMI like exceptions that can happen everywhere */
  629. .macro paranoidentry sym, ist=0
  630. SAVE_ALL
  631. cld
  632. movl $1,%ebx
  633. movl $MSR_GS_BASE,%ecx
  634. rdmsr
  635. testl %edx,%edx
  636. js 1f
  637. swapgs
  638. xorl %ebx,%ebx
  639. 1:
  640. .if \ist
  641. movq %gs:pda_data_offset, %rbp
  642. .endif
  643. movq %rsp,%rdi
  644. movq ORIG_RAX(%rsp),%rsi
  645. movq $-1,ORIG_RAX(%rsp)
  646. .if \ist
  647. subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  648. .endif
  649. call \sym
  650. .if \ist
  651. addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
  652. .endif
  653. cli
  654. .endm
  655. /*
  656. * Exception entry point. This expects an error code/orig_rax on the stack
  657. * and the exception handler in %rax.
  658. */
  659. ENTRY(error_entry)
  660. _frame RDI
  661. /* rdi slot contains rax, oldrax contains error code */
  662. cld
  663. subq $14*8,%rsp
  664. CFI_ADJUST_CFA_OFFSET (14*8)
  665. movq %rsi,13*8(%rsp)
  666. CFI_REL_OFFSET rsi,RSI
  667. movq 14*8(%rsp),%rsi /* load rax from rdi slot */
  668. movq %rdx,12*8(%rsp)
  669. CFI_REL_OFFSET rdx,RDX
  670. movq %rcx,11*8(%rsp)
  671. CFI_REL_OFFSET rcx,RCX
  672. movq %rsi,10*8(%rsp) /* store rax */
  673. CFI_REL_OFFSET rax,RAX
  674. movq %r8, 9*8(%rsp)
  675. CFI_REL_OFFSET r8,R8
  676. movq %r9, 8*8(%rsp)
  677. CFI_REL_OFFSET r9,R9
  678. movq %r10,7*8(%rsp)
  679. CFI_REL_OFFSET r10,R10
  680. movq %r11,6*8(%rsp)
  681. CFI_REL_OFFSET r11,R11
  682. movq %rbx,5*8(%rsp)
  683. CFI_REL_OFFSET rbx,RBX
  684. movq %rbp,4*8(%rsp)
  685. CFI_REL_OFFSET rbp,RBP
  686. movq %r12,3*8(%rsp)
  687. CFI_REL_OFFSET r12,R12
  688. movq %r13,2*8(%rsp)
  689. CFI_REL_OFFSET r13,R13
  690. movq %r14,1*8(%rsp)
  691. CFI_REL_OFFSET r14,R14
  692. movq %r15,(%rsp)
  693. CFI_REL_OFFSET r15,R15
  694. xorl %ebx,%ebx
  695. testl $3,CS(%rsp)
  696. je error_kernelspace
  697. error_swapgs:
  698. swapgs
  699. error_sti:
  700. movq %rdi,RDI(%rsp)
  701. movq %rsp,%rdi
  702. movq ORIG_RAX(%rsp),%rsi /* get error code */
  703. movq $-1,ORIG_RAX(%rsp)
  704. call *%rax
  705. /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
  706. error_exit:
  707. movl %ebx,%eax
  708. RESTORE_REST
  709. cli
  710. GET_THREAD_INFO(%rcx)
  711. testl %eax,%eax
  712. jne retint_kernel
  713. movl threadinfo_flags(%rcx),%edx
  714. movl $_TIF_WORK_MASK,%edi
  715. andl %edi,%edx
  716. jnz retint_careful
  717. swapgs
  718. RESTORE_ARGS 0,8,0
  719. jmp iret_label
  720. CFI_ENDPROC
  721. error_kernelspace:
  722. incl %ebx
  723. /* There are two places in the kernel that can potentially fault with
  724. usergs. Handle them here. The exception handlers after
  725. iret run with kernel gs again, so don't set the user space flag.
  726. B stepping K8s sometimes report an truncated RIP for IRET
  727. exceptions returning to compat mode. Check for these here too. */
  728. leaq iret_label(%rip),%rbp
  729. cmpq %rbp,RIP(%rsp)
  730. je error_swapgs
  731. movl %ebp,%ebp /* zero extend */
  732. cmpq %rbp,RIP(%rsp)
  733. je error_swapgs
  734. cmpq $gs_change,RIP(%rsp)
  735. je error_swapgs
  736. jmp error_sti
  737. /* Reload gs selector with exception handling */
  738. /* edi: new selector */
  739. ENTRY(load_gs_index)
  740. CFI_STARTPROC
  741. pushf
  742. CFI_ADJUST_CFA_OFFSET 8
  743. cli
  744. swapgs
  745. gs_change:
  746. movl %edi,%gs
  747. 2: mfence /* workaround */
  748. swapgs
  749. popf
  750. CFI_ADJUST_CFA_OFFSET -8
  751. ret
  752. CFI_ENDPROC
  753. .section __ex_table,"a"
  754. .align 8
  755. .quad gs_change,bad_gs
  756. .previous
  757. .section .fixup,"ax"
  758. /* running with kernelgs */
  759. bad_gs:
  760. swapgs /* switch back to user gs */
  761. xorl %eax,%eax
  762. movl %eax,%gs
  763. jmp 2b
  764. .previous
  765. /*
  766. * Create a kernel thread.
  767. *
  768. * C extern interface:
  769. * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  770. *
  771. * asm input arguments:
  772. * rdi: fn, rsi: arg, rdx: flags
  773. */
  774. ENTRY(kernel_thread)
  775. CFI_STARTPROC
  776. FAKE_STACK_FRAME $child_rip
  777. SAVE_ALL
  778. # rdi: flags, rsi: usp, rdx: will be &pt_regs
  779. movq %rdx,%rdi
  780. orq kernel_thread_flags(%rip),%rdi
  781. movq $-1, %rsi
  782. movq %rsp, %rdx
  783. xorl %r8d,%r8d
  784. xorl %r9d,%r9d
  785. # clone now
  786. call do_fork
  787. movq %rax,RAX(%rsp)
  788. xorl %edi,%edi
  789. /*
  790. * It isn't worth to check for reschedule here,
  791. * so internally to the x86_64 port you can rely on kernel_thread()
  792. * not to reschedule the child before returning, this avoids the need
  793. * of hacks for example to fork off the per-CPU idle tasks.
  794. * [Hopefully no generic code relies on the reschedule -AK]
  795. */
  796. RESTORE_ALL
  797. UNFAKE_STACK_FRAME
  798. ret
  799. CFI_ENDPROC
  800. child_rip:
  801. /*
  802. * Here we are in the child and the registers are set as they were
  803. * at kernel_thread() invocation in the parent.
  804. */
  805. movq %rdi, %rax
  806. movq %rsi, %rdi
  807. call *%rax
  808. # exit
  809. xorl %edi, %edi
  810. call do_exit
  811. /*
  812. * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  813. *
  814. * C extern interface:
  815. * extern long execve(char *name, char **argv, char **envp)
  816. *
  817. * asm input arguments:
  818. * rdi: name, rsi: argv, rdx: envp
  819. *
  820. * We want to fallback into:
  821. * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
  822. *
  823. * do_sys_execve asm fallback arguments:
  824. * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  825. */
  826. ENTRY(execve)
  827. CFI_STARTPROC
  828. FAKE_STACK_FRAME $0
  829. SAVE_ALL
  830. call sys_execve
  831. movq %rax, RAX(%rsp)
  832. RESTORE_REST
  833. testq %rax,%rax
  834. je int_ret_from_sys_call
  835. RESTORE_ARGS
  836. UNFAKE_STACK_FRAME
  837. ret
  838. CFI_ENDPROC
  839. KPROBE_ENTRY(page_fault)
  840. errorentry do_page_fault
  841. .previous .text
  842. ENTRY(coprocessor_error)
  843. zeroentry do_coprocessor_error
  844. ENTRY(simd_coprocessor_error)
  845. zeroentry do_simd_coprocessor_error
  846. ENTRY(device_not_available)
  847. zeroentry math_state_restore
  848. /* runs on exception stack */
  849. KPROBE_ENTRY(debug)
  850. INTR_FRAME
  851. pushq $0
  852. CFI_ADJUST_CFA_OFFSET 8
  853. paranoidentry do_debug, DEBUG_STACK
  854. jmp paranoid_exit
  855. CFI_ENDPROC
  856. .previous .text
  857. /* runs on exception stack */
  858. KPROBE_ENTRY(nmi)
  859. INTR_FRAME
  860. pushq $-1
  861. CFI_ADJUST_CFA_OFFSET 8
  862. paranoidentry do_nmi
  863. /*
  864. * "Paranoid" exit path from exception stack.
  865. * Paranoid because this is used by NMIs and cannot take
  866. * any kernel state for granted.
  867. * We don't do kernel preemption checks here, because only
  868. * NMI should be common and it does not enable IRQs and
  869. * cannot get reschedule ticks.
  870. */
  871. /* ebx: no swapgs flag */
  872. paranoid_exit:
  873. testl %ebx,%ebx /* swapgs needed? */
  874. jnz paranoid_restore
  875. testl $3,CS(%rsp)
  876. jnz paranoid_userspace
  877. paranoid_swapgs:
  878. swapgs
  879. paranoid_restore:
  880. RESTORE_ALL 8
  881. iretq
  882. paranoid_userspace:
  883. GET_THREAD_INFO(%rcx)
  884. movl threadinfo_flags(%rcx),%ebx
  885. andl $_TIF_WORK_MASK,%ebx
  886. jz paranoid_swapgs
  887. movq %rsp,%rdi /* &pt_regs */
  888. call sync_regs
  889. movq %rax,%rsp /* switch stack for scheduling */
  890. testl $_TIF_NEED_RESCHED,%ebx
  891. jnz paranoid_schedule
  892. movl %ebx,%edx /* arg3: thread flags */
  893. sti
  894. xorl %esi,%esi /* arg2: oldset */
  895. movq %rsp,%rdi /* arg1: &pt_regs */
  896. call do_notify_resume
  897. cli
  898. jmp paranoid_userspace
  899. paranoid_schedule:
  900. sti
  901. call schedule
  902. cli
  903. jmp paranoid_userspace
  904. CFI_ENDPROC
  905. .previous .text
  906. KPROBE_ENTRY(int3)
  907. INTR_FRAME
  908. pushq $0
  909. CFI_ADJUST_CFA_OFFSET 8
  910. paranoidentry do_int3, DEBUG_STACK
  911. jmp paranoid_exit
  912. CFI_ENDPROC
  913. .previous .text
  914. ENTRY(overflow)
  915. zeroentry do_overflow
  916. ENTRY(bounds)
  917. zeroentry do_bounds
  918. ENTRY(invalid_op)
  919. zeroentry do_invalid_op
  920. ENTRY(coprocessor_segment_overrun)
  921. zeroentry do_coprocessor_segment_overrun
  922. ENTRY(reserved)
  923. zeroentry do_reserved
  924. /* runs on exception stack */
  925. ENTRY(double_fault)
  926. XCPT_FRAME
  927. paranoidentry do_double_fault
  928. jmp paranoid_exit
  929. CFI_ENDPROC
  930. ENTRY(invalid_TSS)
  931. errorentry do_invalid_TSS
  932. ENTRY(segment_not_present)
  933. errorentry do_segment_not_present
  934. /* runs on exception stack */
  935. ENTRY(stack_segment)
  936. XCPT_FRAME
  937. paranoidentry do_stack_segment
  938. jmp paranoid_exit
  939. CFI_ENDPROC
  940. KPROBE_ENTRY(general_protection)
  941. errorentry do_general_protection
  942. .previous .text
  943. ENTRY(alignment_check)
  944. errorentry do_alignment_check
  945. ENTRY(divide_error)
  946. zeroentry do_divide_error
  947. ENTRY(spurious_interrupt_bug)
  948. zeroentry do_spurious_interrupt_bug
  949. #ifdef CONFIG_X86_MCE
  950. /* runs on exception stack */
  951. ENTRY(machine_check)
  952. INTR_FRAME
  953. pushq $0
  954. CFI_ADJUST_CFA_OFFSET 8
  955. paranoidentry do_machine_check
  956. jmp paranoid_exit
  957. CFI_ENDPROC
  958. #endif
  959. ENTRY(call_softirq)
  960. CFI_STARTPROC
  961. movq %gs:pda_irqstackptr,%rax
  962. movq %rsp,%rdx
  963. CFI_DEF_CFA_REGISTER rdx
  964. incl %gs:pda_irqcount
  965. cmove %rax,%rsp
  966. pushq %rdx
  967. /*todo CFI_DEF_CFA_EXPRESSION ...*/
  968. call __do_softirq
  969. popq %rsp
  970. CFI_DEF_CFA_REGISTER rsp
  971. decl %gs:pda_irqcount
  972. ret
  973. CFI_ENDPROC